<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>arXiv 每日论文精选</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" rel="stylesheet">
    <link rel="stylesheet" href="static/styles.css?v=1764044820">
    <script src="static/tailwind.config.js"></script>

    <style>
        /* 分级折叠功能样式 */
        .collapsed-level-1 .paper-details {
            display: none;
        }
        
        .collapsed-level-2 {
            display: none !important;
        }
        
        /* 展开/折叠图标样式 */
        .expand-icon {
            display: inline-block;
            width: 20px;
            text-align: center;
            margin-right: 5px;
        }
        
        /* 展开/折叠按钮样式 */
        .expand-toggle {
            cursor: pointer;
            padding: 8px 12px;
            background-color: #f3f4f6;
            border: 1px solid #e5e7eb;
            border-radius: 6px;
            margin-bottom: 16px;
            text-align: center;
            font-weight: 500;
            color: #4b5563;
            transition: all 0.2s ease;
        }
        
        .expand-toggle:hover {
            background-color: #e5e7eb;
        }
        
        /* 分割线样式 */
        .papers-divider {
            height: 1px;
            background-color: #e5e7eb;
            margin: 20px 0;
            position: relative;
        }
        
        .papers-divider-label {
            position: absolute;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            background-color: white;
            padding: 0 12px;
            color: #9ca3af;
            font-size: 14px;
            cursor: pointer;
        }
        
        .papers-divider-label:hover {
            color: #4b5563;
        }
        
        /* 展开后的样式 */
        .expanded-all .collapsed-level-1 .paper-details,
        .expanded-all .collapsed-level-2 {
            display: block;
        }
        
        .expanded-level-2 .collapsed-level-2 {
            display: block;
        }
    </style>
    </head>

<body class="bg-gray-50 font-sans text-dark">
    <!-- 顶部导航与统计信息合并 -->
    <header class="bg-white shadow-sm sticky top-0 z-10 border-b border-gray-200">
        <div class="container mx-auto px-4 py-4">
            <div class="flex flex-col md:flex-row justify-between items-start md:items-center mb-3">
                <div class="flex items-center">
                    <i class="fa fa-book text-primary text-xl mr-2"></i>
                    <h1 class="text-lg md:text-xl font-bold text-gray-800">arXiv 每日论文精选</h1>
                </div>
                <div class="flex items-center mt-2 md:mt-0">
                    <span id="current-date" class="text-gray-600 text-sm">
                        <i class="fa fa-calendar-o mr-1"></i>2025-11-25
                    </span>
                    <div class="ml-3 relative" id="date-picker-container">
                        <button id="date-picker-toggle" class="bg-light border border-gray-300 text-gray-700 py-1 px-3 pr-6 rounded text-sm leading-tight focus:outline-none focus:bg-white inline-flex items-center">
                            <i class="fa fa-calendar mr-2"></i>
                            <span id="selected-date-text">2025-11-25</span>
                            <i class="fa fa-chevron-down ml-2 text-xs"></i>
                        </button>
                        <div id="date-picker" class="hidden absolute right-0 mt-1 bg-white border border-gray-300 rounded shadow-lg p-2 z-20 w-56">
                            <div class="flex justify-between items-center mb-2">
                                <button id="prev-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-left"></i></button>
                                <h4 id="current-month">2025-11-25</h4>
                                <button id="next-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-right"></i></button>
                            </div>
                            <div class="grid grid-cols-7 gap-1 text-center text-xs mb-1">
                                <div class="text-gray-500">日</div>
                                <div class="text-gray-500">一</div>
                                <div class="text-gray-500">二</div>
                                <div class="text-gray-500">三</div>
                                <div class="text-gray-500">四</div>
                                <div class="text-gray-500">五</div>
                                <div class="text-gray-500">六</div>
                            </div>
                            <div id="calendar-grid" class="grid grid-cols-7 gap-1 text-center text-sm">
                                <!-- 日历格子将通过JavaScript动态生成 -->
                            </div>
                        </div>
                    </div>
                </div>
            </div>

            <!-- 统计信息 -->
            <div class="flex flex-wrap gap-4 text-sm">
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-file-text-o"></i> 总论文数:</span>
                    <span id="total-papers" class="font-semibold text-primary">152</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-star"></i> 精选论文数:</span>
                    <span id="selected-papers" class="font-semibold text-accent">20</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-line-chart"></i> 平均评分:</span>
                    <span id="avg-score" class="font-semibold text-secondary">2.7</span>
                </div>
            </div>
        </div>
    </header>

    <!-- 主内容区 -->
    <main class="container mx-auto px-4 py-5">
        <!-- 筛选器 -->
        <div class="mb-4 flex flex-col sm:flex-row justify-between items-start sm:items-center">
            <div class="text-gray-700 text-sm mb-2 sm:mb-0">
                <span id="display-count" class="font-medium">显示 152 篇论文 (共 152 篇)</span>
            </div>
            <div class="flex space-x-2">
                <button id="show-all"
                    class="px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors">
                    全部论文
                </button>
                <button id="show-selected"
                    class="px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors">
                    仅显示精选
                </button>
            </div>
        </div>

        <!-- 论文列表 -->
        <div id="papers-container" class="grid grid-cols-1 gap-4">
            
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19325v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于多语言大语言模型的生成式查询扩展用于跨语言信息检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generative Query Expansion with Multilingual LLMs for Cross-Lingual Information Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Olivia Macmillan-Scott, Roksana Goworek, Eda B. Özyiğit
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究跨语言信息检索中的查询扩展问题，核心思想是利用多语言大语言模型生成伪文档来扩展查询，通过语义增强和长度匹配来提升检索效果。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接研究LLM在搜索领域的查询扩展应用，探索多语言LLM的生成式方法，与搜索和LLM应用焦点高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:18:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19325v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19325v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Query expansion is the reformulation of a user query by adding semantically related information, and is an essential component of monolingual and cross-lingual information retrieval used to ensure that relevant documents are not missed. Recently, multilingual large language models (mLLMs) have shifted query expansion from semantic augmentation with synonyms and related words to pseudo-document generation. Pseudo-documents both introduce additional relevant terms and bridge the gap between short queries and long documents, which is particularly beneficial in dense retrieval. This study evaluates recent mLLMs and fine-tuned variants across several generative expansion strategies to identify factors that drive cross-lingual retrieval performance. Results show that query length largely determines which prompting technique is effective, and that more elaborate prompts often do not yield further gains. Substantial linguistic disparities persist: cross-lingual query expansion can produce the largest improvements for languages with the weakest baselines, yet retrieval is especially poor between languages written in different scripts. Fine-tuning is found to lead to performance gains only when the training and test data are of similar format. These outcomes underline the need for more balanced multilingual and cross-lingual training and evaluation resources.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19324v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>什么驱动跨语言排序？基于多语言语言模型的检索方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            What Drives Cross-lingual Ranking? Retrieval Approaches with Multilingual Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Roksana Goworek, Olivia Macmillan-Scott, Eda B. Özyiğit
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究跨语言信息检索的核心驱动因素，核心方法是系统评估文档翻译、多语言密集检索、对比学习和交叉编码器重排四种干预策略，发现基于语义多语言嵌入和针对性学习对齐的方法优于翻译流程。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文系统评估多语言检索方法在跨语言搜索中的表现，直接涉及搜索领域的核心算法改进和多语言语义嵌入技术，与LLM在搜索应用高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:17:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19324v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19324v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Cross-lingual information retrieval (CLIR) enables access to multilingual knowledge but remains challenging due to disparities in resources, scripts, and weak cross-lingual semantic alignment in embedding models. Existing pipelines often rely on translation and monolingual retrieval heuristics, which add computational overhead and noise, degrading performance. This work systematically evaluates four intervention types, namely document translation, multilingual dense retrieval with pretrained encoders, contrastive learning at word, phrase, and query-document levels, and cross-encoder re-ranking, across three benchmark datasets. We find that dense retrieval models trained specifically for CLIR consistently outperform lexical matching methods and derive little benefit from document translation. Contrastive learning mitigates language biases and yields substantial improvements for encoders with weak initial alignment, and re-ranking can be effective, but depends on the quality of the cross-encoder training data. Although high-resource languages still dominate overall performance, gains over lexical and document-translated baselines are most pronounced for low-resource and cross-script pairs. These findings indicate that cross-lingual search systems should prioritise semantic multilingual embeddings and targeted learning-based alignment over translation-based pipelines, particularly for cross-script and under-resourced languages.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18997v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>面向短视频推荐权衡优化的异构多处理提升建模
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Heterogeneous Multi-treatment Uplift Modeling for Trade-off Optimization in Short-Video Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chenhao Zhai, Chang Meng, Xueliang Wang, Shuchang Liu, Xiaolong Hu, Shisong Tang...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究短视频推荐中多策略冲突的权衡优化问题，核心思想是通过离线混合提升建模捕获策略的协同与个体效应，结合在线动态决策实现个性化响应权重分配。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统中的多目标权衡优化问题，提出了异构多处理提升建模框架，与推荐系统核心领域进展高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:22:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18997v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18997v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid proliferation of short videos on social media platforms presents unique challenges and opportunities for recommendation systems. Users exhibit diverse preferences, and the responses resulting from different strategies often conflict with one another, potentially exhibiting inverse correlations between metrics such as watch time and video view counts. Existing uplift models face limitations in handling the heterogeneous multi-treatment scenarios of short-video recommendations, often failing to effectively capture both the synergistic and individual causal effects of different strategies. Furthermore, traditional fixed-weight approaches for balancing these responses lack personalization and can result in biased decision-making. To address these issues, we propose a novel Heterogeneous Multi-treatment Uplift Modeling (HMUM) framework for trade-off optimization in short-video recommendations. HMUM comprises an Offline Hybrid Uplift Modeling (HUM) module, which captures the synergistic and individual effects of multiple strategies, and an Online Dynamic Decision-Making (DDM) module, which estimates the value weights of different user responses in real-time for personalized decision-making. Evaluated on two public datasets, an industrial dataset, and through online A/B experiments on the Kuaishou platform, our model demonstrated superior offline performance and significant improvements in key metrics. It is now fully deployed on the platform, benefiting hundreds of millions of users.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18805v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>STORE：用于扩展排序模型的语义分词、正交旋转与高效注意力机制
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            STORE: Semantic Tokenization, Orthogonal Rotation and Efficient Attention for Scaling Up Ranking Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi Xu, Chaofan Fan, Jinxin Hu, Yu Zhang, Zeng Xiaoyi, Jing Zhang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究推荐系统中高基数异构稀疏特征带来的表示瓶颈和计算瓶颈问题，核心方法是通过语义分词处理特征异质性、正交旋转优化特征交互、高效注意力筛选重要令牌来构建可扩展的排序框架。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统的核心瓶颈问题，提出语义分词、正交旋转和高效注意力三大创新，在模型架构和计算效率方面都有重要突破。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 06:20:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18805v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18805v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Ranking models have become an important part of modern personalized recommendation systems. However, significant challenges persist in handling high-cardinality, heterogeneous, and sparse feature spaces, particularly regarding model scalability and efficiency. We identify two key bottlenecks: (i) Representation Bottleneck: Driven by the high cardinality and dynamic nature of features, model capacity is forced into sparse-activated embedding layers, leading to low-rank representations. This, in turn, triggers phenomena like "One-Epoch" and "Interaction-Collapse," ultimately hindering model scalability.(ii) Computational Bottleneck: Integrating all heterogeneous features into a unified model triggers an explosion in the number of feature tokens, rendering traditional attention mechanisms computationally demanding and susceptible to attention dispersion. To dismantle these barriers, we introduce STORE, a unified and scalable token-based ranking framework built upon three core innovations: (1) Semantic Tokenization fundamentally tackles feature heterogeneity and sparsity by decomposing high-cardinality sparse features into a compact set of stable semantic tokens; and (2) Orthogonal Rotation Transformation is employed to rotate the subspace spanned by low-cardinality static features, which facilitates more efficient and effective feature interactions; and (3) Efficient attention that filters low-contributing tokens to improve computional efficiency while preserving model accuracy. Across extensive offline experiments and online A/B tests, our framework consistently improves prediction accuracy(online CTR by 2.71%, AUC by 1.195%) and training effeciency (1.84 throughput).
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18740v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于自适应偏好优化的多模态大语言模型在序列推荐中的应用
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Multimodal Large Language Models with Adaptive Preference Optimization for Sequential Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Wang, Yonghui Yang, Le Wu, Yi Zhang, Richang Hong
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究多模态大语言模型在序列推荐中的偏好优化问题，核心思想是通过动态调整样本权重和引入高斯扰动分布优化，解决样本难度不平衡和跨模态语义偏差两大挑战。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对多模态推荐系统的核心挑战，提出自适应偏好优化方法，完美契合LLM在推荐系统的应用、Transformer架构优化和异构数据统一建模等多个关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 04:10:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18740v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18740v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in Large Language Models (LLMs) have opened new avenues for sequential recommendation by enabling natural language reasoning over user behavior sequences. A common approach formulates recommendation as a language modeling task, where interaction histories are transformed into prompts and user preferences are learned via supervised fine-tuning. However, these methods operate solely in the textual modality and often miss users' fine-grained interests, especially when shaped by rich visual signals such as product images or movie posters. Multimodal Large Language Models (MLLMs) offer a promising alternative by aligning text and vision in a shared semantic space. A prevalent training paradigm applies Supervised Fine-Tuning (SFT) followed by Direct Preference Optimization (DPO) to model user preferences. Yet, two core challenges remain: 1) Imbalanced sample hardness, where random negative sampling causes overfitting on easy examples and under-training on hard ones; 2) Cross-modal semantic bias, where the fixed reference model in DPO prevents the policy model from correcting modality misalignments--especially over long sequences. To address these issues, we propose a Multimodal LLM framework that integrates Hardness-aware and Noise-regularized preference optimization for Recommendation (HaNoRec). Specifically, HaNoRec dynamically adjusts optimization weights based on both the estimated hardness of each training sample and the policy model's real-time responsiveness, prioritizing harder examples. It further introduces Gaussian-perturbed distribution optimization on output logits to enhance cross-modal semantic consistency and reduce modality bias inherited from the reference model.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18717v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>何时推荐与推荐什么：主动序列推荐中时机与内容的联合建模
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            When and What to Recommend: Joint Modeling of Timing and Content for Active Sequential Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jin Chai, Xiaoxiao Ma, Jian Yang, Jia Wu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究主动序列推荐中如何预测用户兴趣时间和推荐内容的联合建模问题，核心方法是提出扩散框架通过联合目标对齐时间预测和内容生成。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接解决推荐系统中的主动推荐核心问题，提出联合建模时序和内容的扩散框架，对序列推荐和时序建模有重要贡献。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 03:16:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18717v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18717v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Sequential recommendation models user preferences to predict the next target item. Most existing work is passive, where the system responds only when users open the application, missing chances after closure. We investigate active recommendation, which predicts the next interaction time and actively delivers items. Two challenges: accurately estimating the Time of Interest (ToI) and generating Item of Interest (IoI) conditioned on the predicted ToI. We propose PASRec, a diffusion-based framework that aligns ToI and IoI via a joint objective. Experiments on five benchmarks show superiority over eight state-of-the-art baselines under leave-one-out and temporal splits.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19417v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>成为我的眼睛：通过多智能体协作将大语言模型扩展到新模态
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Be My Eyes: Extending Large Language Models to New Modalities Through Multi-Agent Collaboration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>James Y. Huang, Sheng Zhang, Qianchu Liu, Guanghui Qin, Tinghui Zhu, Tristan Nau...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何低成本扩展LLM到新模态进行多模态推理。核心思想是构建多智能体框架，让高效VLM作为感知器与强大LLM作为推理器通过对话协作，结合感知与推理优势避免训练大型多模态模型。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过多智能体协作将LLM扩展到新模态，直接涉及LLM技术应用和异构数据处理，与VLM类比思想高度契合。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:55:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19417v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19417v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) have demonstrated remarkable capabilities in challenging, knowledge-intensive reasoning tasks. However, extending LLMs to perceive and reason over a new modality (e.g., vision), often requires costly development of large-scale vision language models (VLMs) with LLMs as backbones. Smaller VLMs are more efficient and adaptable but often lack the broad knowledge and reasoning capabilities of frontier LLMs. In this work, we propose BeMyEyes, a modular, multi-agent framework for extending LLMs to multimodal reasoning by orchestrating collaboration between efficient, adaptable VLMs as perceivers and powerful LLMs as reasoners through conversations. We then introduce a data synthesis and supervised fine-tuning pipeline to train the perceiver agent to effectively collaborate with the reasoner agent. By combining the complementary strengths of perception and reasoning agents, BeMyEyes avoids the need for training large-scale multimodal models, preserves the generalization and reasoning capabilities of LLMs, and allows flexible extension to new domains and modalities. Experiments show that our framework unlocks the multimodal reasoning capabilities for LLMs, enabling a lightweight and fully open-source solution, i.e. equipping text-only DeepSeek-R1 with Qwen2.5-VL-7B perceiver, to outperform large-scale proprietary VLMs such as GPT-4o on a wide range of knowledge-intensive multimodal tasks. These results demonstrate the effectiveness, modularity, and scalability of our multi-agent approach for building future multimodal reasoning systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19333v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>学习推理：使用GPT-OSS或DeepSeek R1推理轨迹训练大语言模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning to Reason: Training LLMs with GPT-OSS or DeepSeek R1 Reasoning Traces
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shaltiel Shmidman, Asher Fredman, Oleg Sudakov, Meriem Bendris
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何利用前沿推理大模型生成高质量推理轨迹来训练中小型语言模型的推理能力。核心方法是通过DeepSeek-R1和GPT-OSS等推理模型的中间推理轨迹作为监督数据，对中小模型进行后训练以传授复杂推理技能。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接研究利用前沿推理LLM生成监督数据来训练中小模型推理能力，属于LLM技术直接应用于模型训练的核心领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:26:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19333v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19333v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Test-time scaling, which leverages additional computation during inference to improve model accuracy, has enabled a new class of Large Language Models (LLMs) that are able to reason through complex problems by understanding the goal, turning this goal into a plan, working through intermediate steps, and checking their own work before answering . Frontier large language models with reasoning capabilities, such as DeepSeek-R1 and OpenAI's gpt-oss, follow the same procedure when solving complex problems by generating intermediate reasoning traces before giving the final answer. Today, these models are being increasingly used to generate reasoning traces that serve as high-quality supervised data for post-training of small and medium-sized language models to teach reasoning capabilities without requiring expensive human curation. In this work, we compare the performance of medium-sized LLMs on Math problems after post-training on two kinds of reasoning traces. We compare the impact of reasoning traces generated by DeepSeek-R1 and gpt-oss LLMs in terms of accuracy and inference efficiency.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19131v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过基于梯度的表示优化在基础大语言模型中引发思维链推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Eliciting Chain-of-Thought in Base LLMs via Gradient-Based Representation Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zijian Wang, Yanxiang Ma, Chang Xu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何激发基础大语言模型的链式思维推理能力。核心方法是基于概率条件生成的隐藏状态优化框架，通过平衡似然和先验正则化来引导模型产生推理轨迹同时保持语言连贯性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过梯度优化方法提升基础LLMs的推理能力，直接属于核心LLM技术进展，对推荐和搜索系统的复杂推理任务有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:55:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19131v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19131v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Chain-of-Thought (CoT) reasoning is a critical capability for large language models (LLMs), enabling them to tackle com- plex multi-step tasks. While base LLMs, pre-trained on general text corpora, often struggle with reasoning due to a lack of specialized training, recent studies reveal their latent reason- ing potential tied to hidden states. However, existing hidden state manipulation methods, such as linear activation steering, suffer from limitations due to their rigid and unconstrained nature, often leading to distribution shifts and degraded text quality. In this work, we propose a novel approach for elic- iting CoT reasoning from base LLMs through hidden state manipulation grounded in probabilistic conditional generation. By reformulating the challenge as an optimization problem with a balanced likelihood and prior regularization framework, our method guides hidden states toward reasoning-oriented trajectories while preserving linguistic coherence. Extensive evaluations across mathematical, commonsense, and logical reasoning benchmarks demonstrate that our approach con- sistently outperforms existing steering methods, offering a theoretically principled and effective solution for enhancing reasoning capabilities in base LLMs.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18936v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>SWAN：通过免解压缩KV缓存压缩实现稀疏筛选注意力以减少推理内存
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SWAN: Sparse Winnowed Attention for Reduced Inference Memory via Decompression-Free KV-Cache Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Santhosh G S, Saurav Prakash, Balaraman Ravindran
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究LLM自回归推理中KV缓存内存占用过大的问题，核心思想是使用离线正交矩阵对KV缓存进行旋转和剪枝，无需解压缩即可直接用于注意力计算，实现运行时可调的动态内存优化。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM推理效率瓶颈，提出无需解压缩的KV缓存压缩方法，属于Transformer架构效率优化的核心进展，对搜索推荐系统的高效部署具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:41:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18936v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18936v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) face a significant bottleneck during autoregressive inference due to the massive memory footprint of the Key-Value (KV) cache. Existing compression techniques like token eviction, quantization, or other low-rank methods often risk information loss, have fixed limits, or introduce significant computational overhead from explicit decompression steps. In this work, we introduce SWAN, a novel, fine-tuning-free framework that eliminates this overhead. Our method uses an offline orthogonal matrix to rotate and prune the KV-cache, which is then used directly in the attention computation without any reconstruction. Our extensive experiments demonstrate that SWAN, augmented with a small dense buffer, offers a robust trade-off, maintaining performance close to the uncompressed baseline even at aggressive 50-60% memory savings per-token on KV-cache. A key advantage is its runtime-tunable compression level, allowing operators to dynamically adjust the memory footprint, a flexibility absent in methods requiring fixed offline configurations. This combination of a decompression-free design, high performance under compression, and adaptability makes SWAN a practical and efficient solution for serving LLMs with long contexts.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18808v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>HyperbolicRAG：通过双曲表示增强检索增强生成
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HyperbolicRAG: Enhancing Retrieval-Augmented Generation with Hyperbolic Representations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cao Linxiao, Wang Ruitao, Li Jindong, Zhou Zhipeng, Yang Menglin
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究图基检索增强生成系统在表示复杂知识图谱中层次抽象关系时的局限性。核心思想是将双曲几何引入图基RAG，通过共享庞加莱流形嵌入实现语义相似性与层次包含的对齐，并融合欧氏和双曲空间的检索信号。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接改进检索增强生成系统，属于核心领域进展，其双曲几何方法对处理层次化知识结构具有重要价值，可应用于搜索和推荐系统的复杂关系建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 06:27:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18808v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18808v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-augmented generation (RAG) enables large language models (LLMs) to access external knowledge, helping mitigate hallucinations and enhance domain-specific expertise. Graph-based RAG enhances structural reasoning by introducing explicit relational organization that enables information propagation across semantically connected text units. However, these methods typically rely on Euclidean embeddings that capture semantic similarity but lack a geometric notion of hierarchical depth, limiting their ability to represent abstraction relationships inherent in complex knowledge graphs. To capture both fine-grained semantics and global hierarchy, we propose HyperbolicRAG, a retrieval framework that integrates hyperbolic geometry into graph-based RAG. HyperbolicRAG introduces three key designs: (1) a depth-aware representation learner that embeds nodes within a shared Poincare manifold to align semantic similarity with hierarchical containment, (2) an unsupervised contrastive regularization that enforces geometric consistency across abstraction levels, and (3) a mutual-ranking fusion mechanism that jointly exploits retrieval signals from Euclidean and hyperbolic spaces, emphasizing cross-space agreement during inference. Extensive experiments across multiple QA benchmarks demonstrate that HyperbolicRAG outperforms competitive baselines, including both standard RAG and graph-augmented baselines.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18659v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>CLaRa：通过连续潜在推理桥接检索与生成
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CLaRa: Bridging Retrieval and Generation with Continuous Latent Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jie He, Richard He Bai, Sinead Williamson, Jeff Z. Pan, Navdeep Jaitly, Yizhe Zh...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究检索增强生成中检索与生成模块分离优化的问题，核心思想是通过连续潜在空间统一表示和端到端联合训练，将检索相关性与生成质量对齐。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文针对检索增强生成中的核心问题提出统一优化框架，直接关联检索系统与LLM应用，在架构创新和联合优化方法上具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 00:11:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18659v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18659v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-augmented generation (RAG) enhances large language models (LLMs) with external knowledge but still suffers from long contexts and disjoint retrieval-generation optimization. In this work, we propose CLaRa (Continuous Latent Reasoning), a unified framework that performs embedding-based compression and joint optimization in a shared continuous space. To obtain semantically rich and retrievable compressed vectors, we introduce SCP, a key-preserving data synthesis framework using QA and paraphrase supervision. CLaRa then trains the reranker and generator end-to-end via a single language modeling loss, with gradients flowing through both modules using a differentiable top-k estimator. Theoretically, this unified optimization aligns retrieval relevance with answer quality. Experiments across multiple QA benchmarks show that CLaRa achieves state-of-the-art compression and reranking performance, often surpassing text-based fine-tuned baselines.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19349v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>重新审视HyDE的反馈模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Revisiting Feedback Models for HyDE
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nour Jedidi, Jimmy Lin
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何优化基于LLM的伪相关反馈方法HyDE，核心思想是将传统反馈模型（如Rocchio）与LLM生成的假设文档相结合，通过更智能的术语提取和加权来改进查询扩展。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接研究LLM在搜索系统中的应用，通过改进HyDE方法提升伪相关反馈效果，与搜索和LLM应用领域高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:50:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19349v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19349v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent approaches that leverage large language models (LLMs) for pseudo-relevance feedback (PRF) have generally not utilized well-established feedback models like Rocchio and RM3 when expanding queries for sparse retrievers like BM25. Instead, they often opt for a simple string concatenation of the query and LLM-generated expansion content. But is this optimal? To answer this question, we revisit and systematically evaluate traditional feedback models in the context of HyDE, a popular method that enriches query representations with LLM-generated hypothetical answer documents. Our experiments show that HyDE's effectiveness can be substantially improved when leveraging feedback algorithms such as Rocchio to extract and weight expansion terms, providing a simple way to further enhance the accuracy of LLM-based PRF methods.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19176v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>从原始特征到有效嵌入：用于多模态食谱推荐的三阶段方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Raw Features to Effective Embeddings: A Three-Stage Approach for Multimodal Recipe Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jeeho Shin, Kyungho Kim, Kijung Shin
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何有效利用多模态特征进行食谱推荐；核心方法是提出三阶段框架，通过基础模型增强内容特征、消息传播增强关系特征、对比学习增强学习特征，将原始特征逐步精炼为有效嵌入。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出多模态特征增强的三阶段框架，直接涉及推荐系统中的核心特征工程和嵌入学习，与RecSys核心进展和LLM应用高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:37:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19176v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19176v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recipe recommendation has become an essential task in web-based food platforms. A central challenge is effectively leveraging rich multimodal features beyond user-recipe interactions. Our analysis shows that even simple uses of multimodal signals yield competitive performance, suggesting that systematic enhancement of these signals is highly promising. We propose TESMR, a 3-stage framework for recipe recommendation that progressively refines raw multimodal features into effective embeddings through: (1) content-based enhancement using foundation models with multimodal comprehension, (2) relation-based enhancement via message propagation over user-recipe interactions, and (3) learning-based enhancement through contrastive learning with learnable embeddings. Experiments on two real-world datasets show that TESMR outperforms existing methods, achieving 7-15% higher Recall@10.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19314v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>PRInTS：面向长视野信息寻求的奖励建模
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PRInTS: Reward Modeling for Long-Horizon Information Seeking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jaewoo Lee, Archiki Prasad, Justin Chih-Yao Chen, Zaid Khan, Elias Stengel-Eskin...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究长视野信息搜索任务中智能体多步决策的优化问题；核心方法是开发生成式过程奖励模型，通过多维度步骤质量评估和动态上下文压缩来指导搜索轨迹。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出生成式过程奖励模型PRInTS，通过密集评分和轨迹摘要解决长视野信息搜索任务，直接应用于搜索领域的智能体决策优化。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:09:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19314v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19314v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Information-seeking is a core capability for AI agents, requiring them to gather and reason over tool-generated information across long trajectories. However, such multi-step information-seeking tasks remain challenging for agents backed by language models. While process reward models (PRMs) can guide agents by ranking candidate steps at test-time, existing PRMs, designed for short reasoning with binary judgment, cannot capture richer dimensions of information-seeking steps, such as tool interactions and reasoning over tool outputs, nor handle the rapidly growing context in long-horizon tasks. To address these limitations, we introduce PRInTS, a generative PRM trained with dual capabilities: (1) dense scoring based on the PRM's reasoning across multiple step quality dimensions (e.g., interpretation of tool outputs, tool call informativeness) and (2) trajectory summarization that compresses the growing context while preserving essential information for step evaluation. Extensive evaluations across FRAMES, GAIA (levels 1-3), and WebWalkerQA (easy-hard) benchmarks on multiple models, along with ablations, reveal that best-of-n sampling with PRInTS enhances information-seeking abilities of open-source models as well as specialized agents, matching or surpassing the performance of frontier models with a much smaller backbone agent and outperforming other strong reward modeling baselines.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19269v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>CDLM：用于更快采样的扩散一致性语言模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CDLM: Consistency Diffusion Language Models For Faster Sampling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Minseo Kim, Chenfeng Xu, Coleman Hooper, Harman Singh, Ben Athiwaratkun, Ce Zhan...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究扩散语言模型推理速度慢的问题，核心思想是结合一致性建模实现多令牌并行生成，并通过块级因果注意力掩码实现KV缓存兼容性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过一致性建模和注意力机制优化直接解决LLM推理效率瓶颈，对搜索推荐系统的实时响应和部署成本有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:21:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19269v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19269v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion Language Models (DLMs) offer a promising parallel generation paradigm but suffer from slow inference due to numerous refinement steps and the inability to use standard KV caching. We introduce CDLM (Consistency Diffusion Language Models), a training-based acceleration method that simultaneously tackles both bottlenecks. CDLM integrates consistency modeling to drastically reduce the number of required sampling steps by enabling multi-token finalization. Furthermore, we enforce a block-wise causal attention mask during fine-tuning, making the model fully compatible with KV caching. Experiments show CDLM achieves 3.6x-14.5x lower latency while maintaining competitive accuracy on math and coding tasks. The full training and evaluation code is available at https://github.com/SqueezeAILab/CDLM.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18934v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>骨架至关重要：面向文本到查询的动态数据增强
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Skeletons Matter: Dynamic Data Augmentation for Text-to-Query
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuchen Ji, Bo Xu, Jie Shi, Jiaqing Liang, Deqing Yang, Yu Mao, Hai Chen, Yanghua...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究多查询语言下语义解析的泛化问题，核心思想是将查询骨架作为统一优化目标，通过诊断模型在骨架处理上的弱点进行针对性数据增强。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出统一文本到查询任务范式，通过动态数据增强解决LLM在语义解析中的泛化问题，直接适用于搜索领域的查询理解与生成。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:39:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18934v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18934v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.DB</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The task of translating natural language questions into query languages has long been a central focus in semantic parsing. Recent advancements in Large Language Models (LLMs) have significantly accelerated progress in this field. However, existing studies typically focus on a single query language, resulting in methods with limited generalizability across different languages. In this paper, we formally define the Text-to-Query task paradigm, unifying semantic parsing tasks across various query languages. We identify query skeletons as a shared optimization target of Text-to-Query tasks, and propose a general dynamic data augmentation framework that explicitly diagnoses model-specific weaknesses in handling these skeletons to synthesize targeted training data. Experiments on four Text-to-Query benchmarks demonstrate that our method achieves state-of-the-art performance using only a small amount of synthesized data, highlighting the efficiency and generality of our approach and laying a solid foundation for unified research on Text-to-Query tasks. We release our code at https://github.com/jjjycaptain/Skeletron.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18931v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>查阅一下：分析现代大语言模型的内部网络搜索能力
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Look It Up: Analysing Internal Web Search Capabilities of Modern LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sahil Kale
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究现代LLM在何时需要调用内部网络搜索以及搜索效果如何的核心问题，核心方法是构建静态和动态问题基准来评估LLM的搜索必要性判断、查询制定能力及置信度校准。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接评估LLM内部搜索能力的使用必要性、时机选择和效果，对搜索系统优化和LLM实际应用具有重要参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:37:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18931v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18931v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Modern large language models integrate web search to provide real-time answers, yet it remains unclear whether they are efficiently calibrated to use search when it is actually needed. We introduce a benchmark evaluating both the necessity and effectiveness of web access across commercial models with no access to internal states or parameters. The dataset includes a static split of 783 temporally anchored questions answerable from pre-cutoff knowledge, aimed at testing whether models invoke search based on low internal confidence, and a dynamic split of 288 post-cutoff queries designed to test whether models recognise when search is required and retrieve updated information. Web access substantially improves static accuracy for GPT-5-mini and Claude Haiku 4.5, though confidence calibration worsens. On dynamic queries, both models frequently invoke search yet remain below 70 percent accuracy due to weak query formulation. Costs per accuracy-improving call remain low, but returns diminish once initial retrieval fails. Selective invocation helps, but models become overconfident and inconsistent after search. Overall, built-in web search meaningfully improves factual accuracy and can be invoked selectively, yet models remain overconfident, skip retrieval when it is essential, and falter once initial search queries underperform. Taken together, internal web search works better as a good low-latency verification layer than a reliable analytical tool, with clear room for improvement.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19278v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>ReMatch：通过匹配增强多模态检索中的表示能力
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReMatch: Boosting Representation through Matching for Multimodal Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qianying Liu, Xiao Liang, Zhiqiang Zhang, Yibo Chen, Xu Tang, Zhongfei Qing, Fen...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何提升多模态检索中的表示质量。核心方法是引入生成式匹配阶段，利用同一MLLM从多视图输入（包括原始数据和投影嵌入）自回归判断相关性，提供实例级判别监督，并结合多可学习令牌生成细粒度上下文嵌入。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过匹配机制增强多模态表示学习，直接关联推荐系统中的检索任务；其多视图输入和生成式匹配方法对处理异构数据具有启发性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:28:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19278v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19278v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present ReMatch, a framework that leverages the generative strength of MLLMs for multimodal retrieval. Previous approaches treated an MLLM as a simple encoder, ignoring its generative nature, and under-utilising its compositional reasoning and world knowledge. We instead train the embedding MLLM end-to-end with a chat-style generative matching stage. The matching stage uses the same MLLM to autoregressively decide relevance from multi-view inputs, including both raw data and its own projected embeddings for each query and document. It provides instance-wise discrimination supervision that complements a standard contrastive loss, offering stronger gradients on hard negatives and preserving the compositional strengths of the original MLLM. To obtain semantically richer multimodal embeddings, we use multiple learnable tokens to augment each input, generating fine-grained contextual, mutually orthogonal embeddings with low inference cost. Leveraging our established high-performance baseline,we assemble the ideas mentioned above into a powerful training recipe and achieve a new state-of-the-art on the Massive Multimodal Embedding Benchmark (MMEB). Our experiments show particularly strong zero-shot generalization results on five datasets, highlighting the robustness and transferability of ReMatch.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19145v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>ABM-LoRA：基于激活边界匹配的低秩自适应快速收敛方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ABM-LoRA: Activation Boundary Matching for Fast Convergence in Low-Rank Adaptation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dongha Lee, Jinhee Park, Minjun Kim, Junseok Kwon
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LoRA微调方法收敛速度慢的问题，核心思想是通过对齐预训练模型和适配器的激活边界来优化初始化策略，从而减少梯度信息损失并加速收敛。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的LoRA初始化优化方法直接提升大模型微调效率，对搜索推荐系统中的模型快速适配具有重要价值，属于Transformer架构效率优化范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:09:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19145v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19145v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We propose Activation Boundary Matching for Low-Rank Adaptation (ABM-LoRA), a principled initialization strategy that substantially accelerates the convergence of low-rank adapters. While LoRA offers high parameter efficiency, its random initialization restricts gradient updates to a mismatched tangent space, causing significant information loss and hindering early convergence. Our ABM-LoRA addresses this by aligning the adapter's activation boundaries with those of the pretrained model before downstream training, thereby maximizing the projection of full-parameter gradients into the adapter subspace. This alignment sharply reduces information loss at initialization, yields a lower starting loss, and accelerates convergence. We demonstrate ABM-LoRA's effectiveness across diverse architectures and tasks: language understanding (T5-Base on GLUE), dialogue generation (LLaMA2-7B on WizardLM), and vision recognition (ViT-B/16 on VTAB-1K). On VTAB-1K, it achieves the highest accuracy among all methods, with strong gains on structured reasoning tasks requiring geometric understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19078v1" target="_blank" rel="noopener noreferrer">
                GraphMind：基于动态图神经网络的大语言模型推理定理选择与结论生成框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GraphMind: Theorem Selection and Conclusion Generation Framework with Dynamic GNN for LLM Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yutong Li, Yitian Zhou, Xudong Wang, GuoChen, Caiyan Qin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的动态图神经网络框架虽然主要面向定理证明，但其核心的图结构推理和动态GNN技术可直接应用于推荐系统中的用户-物品图建模和动态关系学习。这种结构化推理能力对于搜索中的查询理解、广告中的用户意图建模都具有重要价值，属于'Enabling Transformer Tech'中的架构创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:18:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19078v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19078v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have demonstrated impressive capabilities in natural language understanding and generation, including multi-step reasoning such as mathematical proving. However, existing approaches often lack an explicit and dynamic mechanism to structurally represent and evolve intermediate reasoning states, which limits their ability to perform context-aware theorem selection and iterative conclusion generation. To address these challenges, we propose GraphMind, a novel dynamic graph-based framework that integrates the graph neural network (GNN) with LLMs to iteratively select theorems and generate intermediate conclusions for multi-step reasoning. Our method models the reasoning process as a heterogeneous evolving graph, where nodes represent conditions, theorems, and conclusions, while edges capture logical dependencies between nodes. By encoding the current reasoning state with GNN and leveraging semantic matching for theorem selection, our framework enables context-aware, interpretable, and structured reasoning in a closed-loop manner. Experiments on various question-answering (QA) datasets demonstrate that our proposed GraphMind method achieves consistent performance improvements and significantly outperforms existing baselines in multi-step reasoning, validating the effectiveness and generalizability of our approach.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19279v1" target="_blank" rel="noopener noreferrer">
                MapFormer：基于输入相关位置嵌入的认知地图自监督学习
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MapFormer: Self-Supervised Learning of Cognitive Maps with Input-Dependent Positional Embeddings
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Victor Rambaud, Salvador Mascarenhas, Yair Lakretz
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出输入相关位置嵌入和认知地图学习，属于Transformer架构效率和新注意力机制的进展。这种技术可以应用于推荐系统中用户行为序列建模，通过动态位置编码更好地捕捉用户兴趣的时空演变模式，或者用于搜索中的查询-文档位置关系建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:29:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19279v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19279v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A cognitive map is an internal model which encodes the abstract relationships among entities in the world, giving humans and animals the flexibility to adapt to new situations, with a strong out-of-distribution (OOD) generalization that current AI systems still do not possess. To bridge this gap, we introduce MapFormers, new architectures based on Transformer models, which can learn cognitive maps from observational data and perform path integration in parallel, in a self-supervised manner. Cognitive maps are learned in the model by disentangling structural relationships in the inputs from their specific content, a property that can be achieved naturally by updating the positional encoding in Transformers with input-dependent matrices. We developed two variants of MapFormers that unify absolute and relative positional encoding to model episodic (EM) and working memory (WM), respectively. We tested MapFormers on several tasks, including a classic 2D navigation task, showing that our models can learn a cognitive map of the underlying space and generalize OOD (e.g., to longer sequences) with near-perfect performance, unlike current architectures. Together, these results demonstrate the superiority of models designed to learn a cognitive map, and the importance of introducing a structural bias for structure-content disentanglement, which can be achieved in Transformers with input-dependent positional encoding. MapFormers have broad applications in both neuroscience and AI, by explaining the neural mechanisms giving rise to cognitive maps, while allowing these relation models to be learned at scale.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18903v1" target="_blank" rel="noopener noreferrer">
                学习率衰减如何在基于课程学习的LLM预训练中浪费您的最佳数据
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How Learning Rate Decay Wastes Your Best Data in Curriculum-Based LLM Pretraining
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kairong Luo, Zhenbo Sun, Haodong Wen, Xinyu Shi, Jiarui Cui, Chenyi Dang, Kaifen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及LLM预训练中的学习率调度和课程学习策略，这属于'Enabling LLM Tech'范畴。在RecSys/Search/Ads中，优化的预训练策略可以提升模型在下游任务（如用户行为预测、内容理解）的性能和收敛效率。虽然不直接应用，但为构建更高效的推荐和搜索模型提供了重要的训练方法学基础。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:03:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18903v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18903v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Due to the scarcity of high-quality data, large language models (LLMs) are often trained on mixtures of data with varying quality levels, even after sophisticated data curation. A natural approach to better leverage high-quality data is curriculum-based pretraining, where the model is trained on data sorted in ascending order of quality as determined by a quality metric. However, prior studies have reported limited improvements from such curriculum-based pretraining strategies. This work identifies a critical factor constraining these methods: the incompatibility between the ascending data quality order and the decaying learning rate (LR) schedule. We find that while curriculum-based training substantially outperforms random shuffling when using a constant LR, its advantage diminishes under standard LR decay schedules. Our experiments show this incompatibility can be mitigated by two simple strategies: (1) employing a more moderate LR decay schedule, where the final LR is only moderately smaller than the peak LR, and (2) replacing LR decay with model averaging, i.e., computing a weighted average of the final few checkpoints. By combining these strategies, we improve the average score on a suite of standard benchmarks by 1.64% over random shuffling, without additional data refinement. Validated on 1.5B-parameter models trained over 30B tokens with various data-quality metrics, our findings call for a re-evaluation of curriculum-based LLM pretraining and underscore the potential of co-designing data curricula with optimization methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19413v1" target="_blank" rel="noopener noreferrer">
                UniGame：将统一多模态模型转变为自身对抗者
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniGame: Turning a Unified Multimodal Model Into Its Own Adversary
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhaolong Su, Wang Lu, Hao Chen, Sharon Li, Jindong Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态模型和对抗性方法，这与'异构数据的VLM类比'焦点相关，可将不同数据模态（如用户特征和序列）统一建模。对抗性训练可能用于增强推荐或搜索系统的鲁棒性，通过让模型与自身对抗来改进表示学习或处理对抗性输入。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:50:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19413v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19413v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Unified Multimodal Models (UMMs) have shown impressive performance in both understanding and generation with a single architecture. However, UMMs still exhibit a fundamental inconsistency: understanding favors compact embeddings, whereas generation favors reconstruction-rich representations. This structural trade-off produces misaligned decision boundaries, degraded cross-modal coherence, and heightened vulnerability under distributional and adversarial shifts. In this paper, we present UniGame, a self-adversarial post-training framework that directly targets the inconsistencies. By applying a lightweight perturber at the shared token interface, UniGame enables the generation branch to actively seek and challenge fragile understanding, turning the model itself into its own adversary. Experiments demonstrate that UniGame significantly improves the consistency (+4.6%). Moreover, it also achieves substantial improvements in understanding (+3.6%), generation (+0.02), out-of-distribution and adversarial robustness (+4.8% and +6.2% on NaturalBench and AdVQA). The framework is architecture-agnostic, introduces less than 1% additional parameters, and is complementary to existing post-training methods. These results position adversarial self-play as a general and effective principle for enhancing the coherence, stability, and unified competence of future multimodal foundation models. The official code is available at: https://github.com/AIFrontierLab/UniGame
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18925v1" target="_blank" rel="noopener noreferrer">
                AttenDence：最大化测试时自适应的注意力置信度
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AttenDence: Maximizing Attention Confidence for Test Time Adaptation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yash Mali
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于注意力机制的测试时自适应优化，这属于Transformer架构效率改进的范畴。在推荐系统和搜索领域，这种技术可以应用于动态调整模型对用户行为序列或上下文特征的注意力分布，提高在线服务的稳定性和性能。通过最大化注意力置信度，可以增强模型在真实部署环境中的鲁棒性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:32:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18925v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18925v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Test-time adaptation (TTA) enables models to adapt to distribution shifts at inference time. While entropy minimization over the output distribution has proven effective for TTA, transformers offer an additional unsupervised learning signal through their attention mechanisms. We propose minimizing the entropy of attention distributions from the CLS token to image patches as a novel TTA objective.This approach encourages the model to attend more confidently to relevant image regions under distribution shift and is effective even when only a single test image is available. We demonstrate that attention entropy minimization improves robustness across diverse corruption types while not hurting performance on clean data on a single sample stream of images at test time.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18875v1" target="_blank" rel="noopener noreferrer">
                用于快速准确多模态大语言模型推理的并行视觉令牌调度
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Parallel Vision Token Scheduling for Fast and Accurate Multimodal LMMs Inference
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wengyi Zhan, Mingbao Lin, Zhihang Lin, Rongrong Ji
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态大语言模型的高效推理技术，属于'赋能LLM技术'范畴。在搜索和推荐系统中，多模态内容（图像+文本）处理需要高效推理，该技术可加速多模态商品搜索、内容推荐等场景的响应速度，提升用户体验。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:29:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18875v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18875v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal large language models (MLLMs) deliver impressive vision-language reasoning but suffer steep inference latency because self-attention scales quadratically with sequence length and thousands of visual tokens contributed by high-resolution images. Naively pruning less-informative visual tokens reduces this burden, yet indiscriminate removal can strip away contextual cues essential for background or fine-grained questions, undermining accuracy. In this paper, we present ParVTS (Parallel Vision Token Scheduling), a training-free scheduling framework that partitions visual tokens into subject and non-subject groups, processes them in parallel to transfer their semantics into question tokens, and discards the non-subject path mid-inference to reduce computation. This scheduling reduces computational complexity, requires no heuristics or additional modules, and is compatible with diverse existing MLLM architectures. Experiments across multiple MLLM backbones show that ParVTS prunes up to 88.9% of visual tokens with minimal performance drop, achieving 1.77x speedup and 70% FLOPs reduction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19232v1" target="_blank" rel="noopener noreferrer">
                机器中的N400：精确定位因果语言模型检测语义违例的位置
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            In Machina N400: Pinpointing Where a Causal Language Model Detects Semantic Violations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Christos-Nikolaos Zacharopoulos, Revekka Kyriakoglou
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究语言模型如何检测语义违例，这属于核心LLM技术的进展。在搜索和推荐系统中，理解语义一致性和检测异常对于改进查询理解、内容相关性评估和异常用户行为检测具有潜在应用价值。然而，该研究更偏向基础语言理解机制，与推荐/搜索/广告的直接应用连接不够紧密。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:43:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19232v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19232v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    How and where does a transformer notice that a sentence has gone semantically off the rails? To explore this question, we evaluated the causal language model (phi-2) using a carefully curated corpus, with sentences that concluded plausibly or implausibly. Our analysis focused on the hidden states sampled at each model layer. To investigate how violations are encoded, we utilized two complementary probes. First, we conducted a per-layer detection using a linear probe. Our findings revealed that a simple linear decoder struggled to distinguish between plausible and implausible endings in the lowest third of the model's layers. However, its accuracy sharply increased in the middle blocks, reaching a peak just before the top layers. Second, we examined the effective dimensionality of the encoded violation. Initially, the violation widens the representational subspace, followed by a collapse after a mid-stack bottleneck. This might indicate an exploratory phase that transitions into rapid consolidation. Taken together, these results contemplate the idea of alignment with classical psycholinguistic findings in human reading, where semantic anomalies are detected only after syntactic resolution, occurring later in the online processing sequence.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19418v1" target="_blank" rel="noopener noreferrer">
                视觉思维链：通过连续视觉标记教导视觉语言模型更好地观察与思考
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Chain-of-Visual-Thought: Teaching VLMs to See and Think Better with Continuous Visual Tokens
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yiming Qin, Bomin Wei, Jiaxin Ge, Konstantinos Kallidromitis, Stephanie Fu, Trev...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于视觉语言模型（VLM）的改进，属于VLM技术范畴。虽然VLM技术本身与推荐/搜索系统的异构数据处理有类比潜力（如将用户序列和上下文特征视为不同模态），但论文标题主要强调视觉理解和推理能力的提升，未明确指向推荐/搜索/广告的具体应用场景，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:55:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19418v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19418v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language Models (VLMs) excel at reasoning in linguistic space but struggle with perceptual understanding that requires dense visual perception, e.g., spatial reasoning and geometric awareness. This limitation stems from the fact that current VLMs have limited mechanisms to capture dense visual information across spatial dimensions. We introduce Chain-of-Visual-Thought (COVT), a framework that enables VLMs to reason not only in words but also through continuous visual tokens-compact latent representations that encode rich perceptual cues. Within a small budget of roughly 20 tokens, COVT distills knowledge from lightweight vision experts, capturing complementary properties such as 2D appearance, 3D geometry, spatial layout, and edge structure. During training, the VLM with COVT autoregressively predicts these visual tokens to reconstruct dense supervision signals (e.g., depth, segmentation, edges, and DINO features). At inference, the model reasons directly in the continuous visual token space, preserving efficiency while optionally decoding dense predictions for interpretability. Evaluated across more than ten diverse perception benchmarks, including CV-Bench, MMVP, RealWorldQA, MMStar, WorldMedQA, and HRBench, integrating COVT into strong VLMs such as Qwen2.5-VL and LLaVA consistently improves performance by 3% to 16% and demonstrates that compact continuous visual thinking enables more precise, grounded, and interpretable multimodal intelligence.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19380v1" target="_blank" rel="noopener noreferrer">
                UISearch：基于图嵌入的多模态企业用户界面截图检索
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UISearch: Graph-Based Embeddings for Multimodal Enterprise UI Screenshots Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Maroun Ayli, Youssef Bakouny, Tushar Sharma, Nader Jalloul, Hani Seifeddine, Rim...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态检索，与搜索领域有一定相关性，但主要聚焦于企业UI截图这一特定场景，而非通用的推荐或广告系统。其图嵌入方法可能在处理异构数据方面提供一些启发，但直接应用于RecSys/Search/Ads的潜力有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:20:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19380v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19380v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Enterprise software companies maintain thousands of user interface screens across products and versions, creating critical challenges for design consistency, pattern discovery, and compliance check. Existing approaches rely on visual similarity or text semantics, lacking explicit modeling of structural properties fundamental to user interface (UI) composition. We present a novel graph-based representation that converts UI screenshots into attributed graphs encoding hierarchical relationships and spatial arrangements, potentially generalizable to document layouts, architectural diagrams, and other structured visual domains. A contrastive graph autoencoder learns embeddings preserving multi-level similarity across visual, structural, and semantic properties. The comprehensive analysis demonstrates that our structural embeddings achieve better discriminative power than state-of-the-art Vision Encoders, representing a fundamental advance in the expressiveness of the UI representation. We implement this representation in UISearch, a multi-modal search framework that combines structural embeddings with semantic search through a composable query language. On 20,396 financial software UIs, UISearch achieves 0.92 Top-5 accuracy with 47.5ms median latency (P95: 124ms), scaling to 20,000+ screens. The hybrid indexing architecture enables complex queries and supports fine-grained UI distinction impossible with vision-only approaches.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19350v1" target="_blank" rel="noopener noreferrer">
                可扩展的轻量参数谱方法用于聚类短文本嵌入及其基于凝聚力的评估指标
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Scalable Parameter-Light Spectral Method for Clustering Short Text Embeddings with a Cohesion-Based Evaluation Metric
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikita Neveditsin, Pawan Lingras, Vijay Mago
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种用于聚类短文本嵌入的谱方法，虽然聚类技术在推荐系统中可用于用户分群或内容组织，但该方法主要关注通用的文本聚类问题，没有明确针对推荐、搜索或广告领域的特定需求。作为基础技术，它可能间接应用于推荐系统的用户画像构建，但缺乏与LLM、Transformer架构或异构数据建模的直接关联，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:52:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19350v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19350v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Clustering short text embeddings is a foundational task in natural language processing, yet remains challenging due to the need to specify the number of clusters in advance. We introduce a scalable spectral method that estimates the number of clusters directly from the structure of the Laplacian eigenspectrum, constructed using cosine similarities and guided by an adaptive sampling strategy. This sampling approach enables our estimator to efficiently scale to large datasets without sacrificing reliability. To support intrinsic evaluation of cluster quality without ground-truth labels, we propose the Cohesion Ratio, a simple and interpretable evaluation metric that quantifies how much intra-cluster similarity exceeds the global similarity background. It has an information-theoretic motivation inspired by mutual information, and in our experiments it correlates closely with extrinsic measures such as normalized mutual information and homogeneity. Extensive experiments on six short-text datasets and four modern embedding models show that standard algorithms like K-Means and HAC, when guided by our estimator, significantly outperform popular parameter-light methods such as HDBSCAN, OPTICS, and Leiden. These results demonstrate the practical value of our spectral estimator and Cohesion Ratio for unsupervised organization and evaluation of short text data. Implementation of our estimator of k and Cohesion Ratio, along with code for reproducing the experiments, is available at https://anonymous.4open.science/r/towards_clustering-0C2E.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19149v1" target="_blank" rel="noopener noreferrer">
                从像素到帖子：检索增强的时尚图像描述与标签生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Pixels to Posts: Retrieval-Augmented Fashion Captioning and Hashtag Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Moazzam Umer Gondal, Hamad Ul Qudous, Daniya Siddiqui, Asma Ahmad Farhan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注时尚领域的图像描述和标签生成，属于内容生成任务，与纯粹的推荐系统、搜索或广告排名相关性较弱。虽然检索增强技术可能对搜索有一定启发，但核心应用场景是AIGC和内容生成，属于需要排除的领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:13:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19149v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19149v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper introduces the retrieval-augmented framework for automatic fashion caption and hashtag generation, combining multi-garment detection, attribute reasoning, and Large Language Model (LLM) prompting. The system aims to produce visually grounded, descriptive, and stylistically interesting text for fashion imagery, overcoming the limitations of end-to-end captioners that have problems with attribute fidelity and domain generalization. The pipeline combines a YOLO-based detector for multi-garment localization, k-means clustering for dominant color extraction, and a CLIP-FAISS retrieval module for fabric and gender attribute inference based on a structured product index. These attributes, together with retrieved style examples, create a factual evidence pack that is used to guide an LLM to generate human-like captions and contextually rich hashtags. A fine-tuned BLIP model is used as a supervised baseline model for comparison. Experimental results show that the YOLO detector is able to obtain a mean Average Precision (mAP@0.5) of 0.71 for nine categories of garments. The RAG-LLM pipeline generates expressive attribute-aligned captions and achieves mean attribute coverage of 0.80 with full coverage at the 50% threshold in hashtag generation, whereas BLIP gives higher lexical overlap and lower generalization. The retrieval-augmented approach exhibits better factual grounding, less hallucination, and great potential for scalable deployment in various clothing domains. These results demonstrate the use of retrieval-augmented generation as an effective and interpretable paradigm for automated and visually grounded fashion content generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18864v1" target="_blank" rel="noopener noreferrer">
                修剪前先思考：针对大型推理模型修剪的选择性自生成校准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Think Before You Prune: Selective Self-Generated Calibration for Pruning Large Reasoning Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yang Xiang, Yixin Ji, Juntao Li, Min Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注大型推理模型的修剪技术，属于模型效率优化范畴。虽然修剪技术作为Transformer架构的效率优化方法具有潜在应用价值，但论文标题明确聚焦于推理模型而非推荐/搜索/广告系统，且未提及任何具体的应用场景。这种通用模型压缩技术对推荐系统可能有间接价值，但直接相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:08:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18864v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18864v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Reasoning Models (LRMs) have demonstrated remarkable performance on complex reasoning benchmarks. However, their long chain-of-thought reasoning processes incur significant inference overhead. Pruning has emerged as a promising approach to reducing computational costs. However, existing efforts have primarily focused on large language models (LLMs), while pruning LRMs remains unexplored. In this work, we conduct the first empirical study on pruning LRMs and show that directly applying existing pruning techniques fails to yield satisfactory results. Our findings indicate that using self-generated reasoning data for calibration can substantially improve pruning performance. We further investigate how the difficulty and length of reasoning data affect pruning outcomes. Our analysis reveals that challenging and moderately long self-generated reasoning data serve as ideal calibration data. Based on these insights, we propose a Selective Self-Generated Reasoning (SSGR) data construction strategy to provide effective calibration data for pruning LRMs. Experimental results on the DeepSeek-R1-Distill model series validate that our strategy improves the reasoning ability of pruned LRMs by 10%-13% compared to general pruning methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18751v1" target="_blank" rel="noopener noreferrer">
                基于分布的特征恢复与融合的鲁棒多模态情感分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Robust Multimodal Sentiment Analysis with Distribution-Based Feature Recovery and Fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Daiqing Wu, Dongbao Yang, Can Ma, Yu Zhou
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及多模态建模，但其核心关注情感分析这一NLP任务，而非推荐系统、搜索或广告中的核心问题。尽管多模态特征融合技术可能对处理异构数据有启发，但缺乏明确的RecSys/Search/Ads应用场景，且情感分析本身属于纯粹的NLP应用领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 04:24:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18751v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18751v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As posts on social media increase rapidly, analyzing the sentiments embedded in image-text pairs has become a popular research topic in recent years. Although existing works achieve impressive accomplishments in simultaneously harnessing image and text information, they lack the considerations of possible low-quality and missing modalities. In real-world applications, these issues might frequently occur, leading to urgent needs for models capable of predicting sentiment robustly. Therefore, we propose a Distribution-based feature Recovery and Fusion (DRF) method for robust multimodal sentiment analysis of image-text pairs. Specifically, we maintain a feature queue for each modality to approximate their feature distributions, through which we can simultaneously handle low-quality and missing modalities in a unified framework. For low-quality modalities, we reduce their contributions to the fusion by quantitatively estimating modality qualities based on the distributions. For missing modalities, we build inter-modal mapping relationships supervised by samples and distributions, thereby recovering the missing modalities from available ones. In experiments, two disruption strategies that corrupt and discard some modalities in samples are adopted to mimic the low-quality and missing modalities in various real-world scenarios. Through comprehensive experiments on three publicly available image-text datasets, we demonstrate the universal improvements of DRF compared to SOTA methods under both two strategies, validating its effectiveness in robust multimodal sentiment analysis.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19433v1" target="_blank" rel="noopener noreferrer">
                动作分块中的混合视野方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mixture of Horizons in Action Chunking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dong Jing, Gang Wang, Jiaqi Liu, Weiliang Tang, Zelong Sun, Yunchao Yao, Zhenyu ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题涉及动作分块和混合视野方法，可能属于序列建模或行为预测领域。虽然动作分块在用户行为序列建模中有潜在应用，但标题过于模糊，无法明确判断其与推荐系统、搜索或广告的具体关联。缺乏足够的上下文来确定其是否属于核心推荐系统进展或具有明确的Transformer架构改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19433v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19433v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-language-action (VLA) models have shown remarkable capabilities in robotic manipulation, but their performance is sensitive to the $\textbf{action chunk length}$ used during training, termed $\textbf{horizon}$. Our empirical study reveals an inherent trade-off: longer horizons provide stronger global foresight but degrade fine-grained accuracy, while shorter ones sharpen local control yet struggle on long-term tasks, implying fixed choice of single horizons being suboptimal. To mitigate the trade-off, we propose a $\textbf{mixture of horizons (MoH)}$ strategy. MoH rearranges the action chunk into several segments with different horizons, processes them in parallel with a shared action transformer, and fuses outputs with a light linear gate. It has three appealing benefits. 1) MoH exploits long-term foresight and short-term precision jointly within a single model, improving both performance and generalizability to complex tasks. 2) MoH is plug-and-play for full-attention action modules with minimal training or inference overhead. 3) MoH enables dynamic inference with adaptive horizons, which selects stable actions through cross-horizon consensus, achieving 2.5$\times$ higher throughput than baselines while preserving superior performance. Extensive experiments over flow-based policies $π_0$, $π_{0.5}$, and one-step regression policy $π_{\text{reg}}$ demonstrate that MoH yields consistent and significant gains on both simulations and real-world tasks. Notably, under mixed-task setting, $π_{0.5}$ with MoH reaches a new state-of-the-art with 99$\%$ average success rate on LIBERO after only $30k$ training iterations. Project page: https://github.com/Timsty1/MixtureOfHorizons
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19261v1" target="_blank" rel="noopener noreferrer">
                LAST：为通用视觉语言模型学习在空间和时间维度进行思考
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LAST: LeArning to Think in Space and Time for Generalist Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuai Wang, Daoan Zhang, Tianyi Bai, Shitong Shao, Jiebo Luo, Jiaheng Wei
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉语言模型的通用能力提升，特别是空间和时间维度的推理能力。虽然视觉语言模型架构与VLM类比处理异构数据的理念有一定关联，但论文标题未明确展示其在推荐系统、搜索或广告领域的直接应用潜力。核心焦点更偏向通用视觉语言能力而非特定领域的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:13:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19261v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19261v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Humans can perceive and understand 3D space and long videos from sequential visual observations. But do vision-language models (VLMs) can? Recent work demonstrates that even state-of-the-art VLMs still struggle to understand 3D space and long videos, although they are powerful in typical vision-language tasks. Current methods often rely on specialized architectural designs to improve performance for 3D tasks and video understanding tasks separately. In contrast, we propose LAST, short for LeArn to Think in Space and Time, to jointly improve 3D spatial and long video understanding for general VLMs with only a set of 2D images as inputs. LAST makes VLMs think in space and time rather than only with text before giving the final answer, building visual thinking trajectories in 3D space and temporal dimension. We demonstrate the effectiveness of LAST in two scenarios: 1) zero-shot, where we directly prompt proprietary models; and 2) fine-tuning general VLMs with data that include thinking trajectories in 3D space and time. We show that LAST brings substantial gains in various benchmarks, including 3 spatial understanding, 4 video understanding, and 3 image understanding tasks. Notably, 15.8% gains on EgoSchema with GPT-4o in a zero-shot manner and 8.3 gains on VSI-Bench compared with Qwen2.5-VL-7B.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19147v1" target="_blank" rel="noopener noreferrer">
                基于多个基础模型的无源域自适应协同学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Collaborative Learning with Multiple Foundation Models for Source-Free Domain Adaptation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Huisoo Lee, Jisu Han, Hyunsouk Cho, Wonjun Hwang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及基础模型和域自适应技术，这些可能作为使能技术应用于推荐系统或搜索中的跨域推荐和冷启动问题。然而，标题中明确提到'无源域自适应'，这主要属于迁移学习领域，与当前关注的核心推荐系统进展、直接LLM应用或Transformer架构创新的直接关联性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:12:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19147v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19147v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Source-Free Domain Adaptation (SFDA) aims to adapt a pre-trained source model to an unlabeled target domain without access to source data. Recent advances in Foundation Models (FMs) have introduced new opportunities for leveraging external semantic knowledge to guide SFDA. However, relying on a single FM is often insufficient, as it tends to bias adaptation toward a restricted semantic coverage, failing to capture diverse contextual cues under domain shift. To overcome this limitation, we propose a Collaborative Multi-foundation Adaptation (CoMA) framework that jointly leverages two different FMs (e.g., CLIP and BLIP) with complementary properties to capture both global semantics and local contextual cues. Specifically, we employ a bidirectional adaptation mechanism that (1) aligns different FMs with the target model for task adaptation while maintaining their semantic distinctiveness, and (2) transfers complementary knowledge from the FMs to the target model. To ensure stable adaptation under mini-batch training, we introduce Decomposed Mutual Information (DMI) that selectively enhances true dependencies while suppressing false dependencies arising from incomplete class coverage. Extensive experiments demonstrate that our method consistently outperforms existing state-of-the-art SFDA methods across four benchmarks, including Office-31, Office-Home, DomainNet-126, and VisDA, under the closed-set setting, while also achieving best results on partial-set and open-set variants.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18960v1" target="_blank" rel="noopener noreferrer">
                AVA-VLA：通过主动视觉注意力改进视觉-语言-动作模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lei Xiao, Jifeng Li, Juntao Gao, Feiyang Ye, Yan Jin, Jingjing Qian, Jing Zhang,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉-语言-动作模型中的主动视觉注意力机制，这属于多模态学习范畴。虽然视觉-语言模型在概念上与处理异构数据的统一建模相关，但该论文明确聚焦于动作控制领域，与推荐系统、搜索或广告的核心技术关联度较低。主动注意力机制可能对交互式推荐界面有潜在启发，但应用场景不够直接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:22:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18960v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18960v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language-Action (VLA) models have demonstrated remarkable capabilities in embodied AI tasks. However, existing VLA models, often built upon Vision-Language Models (VLMs), typically process dense visual inputs independently at each timestep. This approach implicitly models the task as a Markov Decision Process (MDP). However, this history-agnostic design is suboptimal for effective visual token processing in dynamic sequential decision-making, as it fails to leverage the context of history. To address this limitation, we reformulate the problem from a Partially Observable Markov Decision Process (POMDP) perspective and propose a novel framework named AVA-VLA. Inspired by the POMDP that the action generation should be conditioned on the belief state. AVA-VLA introduces Active Visual Attention (AVA) to dynamically modulate visual processing. It achieves this by leveraging the recurrent state, which is a neural approximation of the agent's belief state derived from the previous decision step. Specifically, the AVA module uses the recurrent state to compute the soft weights to actively process task-relevant visual tokens based on its historical context. Comprehensive evaluations demonstrate that AVA-VLA achieves state-of-the-art performance across popular robotic benchmarks, including LIBERO and CALVIN. Furthermore, real-world deployments on a dual-arm robot platform validate the framework's practical applicability and robust sim-to-real transferability.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18874v1" target="_blank" rel="noopener noreferrer">
                GContextFormer：一种用于多模态轨迹预测的全局上下文感知混合多头注意力方法，采用缩放加性聚合
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GContextFormer: A global context-aware hybrid multi-head attention approach with scaled additive aggregation for multimodal trajectory prediction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuzhi Chen, Yuanchang Xie, Lei Zhao, Pan Liu, Yajie Zou, Chen Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注轨迹预测任务，属于计算机视觉和自动驾驶领域，与推荐系统、搜索或广告的核心技术关联较弱。虽然Transformer架构的改进（如混合多头注意力）可能对推荐系统有间接启发，但论文的应用场景和问题定义与当前关注领域相距甚远，缺乏明确的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:28:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18874v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18874v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.MA</span><span class="category-tag">cs.RO</span><span class="category-tag">cs.SI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal trajectory prediction generates multiple plausible future trajectories to address vehicle motion uncertainty from intention ambiguity and execution variability. However, HD map-dependent models suffer from costly data acquisition, delayed updates, and vulnerability to corrupted inputs, causing prediction failures. Map-free approaches lack global context, with pairwise attention over-amplifying straight patterns while suppressing transitional patterns, resulting in motion-intention misalignment. This paper proposes GContextFormer, a plug-and-play encoder-decoder architecture with global context-aware hybrid attention and scaled additive aggregation achieving intention-aligned multimodal prediction without map reliance. The Motion-Aware Encoder builds scene-level intention prior via bounded scaled additive aggregation over mode-embedded trajectory tokens and refines per-mode representations under shared global context, mitigating inter-mode suppression and promoting intention alignment. The Hierarchical Interaction Decoder decomposes social reasoning into dual-pathway cross-attention: a standard pathway ensures uniform geometric coverage over agent-mode pairs while a neighbor-context-enhanced pathway emphasizes salient interactions, with gating module mediating their contributions to maintain coverage-focus balance. Experiments on eight highway-ramp scenarios from TOD-VT dataset show GContextFormer outperforms state-of-the-art baselines. Compared to existing transformer models, GContextFormer achieves greater robustness and concentrated improvements in high-curvature and transition zones via spatial distributions. Interpretability is achieved through motion mode distinctions and neighbor context modulation exposing reasoning attribution. The modular architecture supports extensibility toward cross-domain multimodal reasoning tasks. Source: https://fenghy-chen.github.io/sources/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18749v1" target="_blank" rel="noopener noreferrer">
                大型语言模型需要经过筛选的上下文才能进行可靠的政治事实核查——即使具备推理能力和网络搜索功能
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Large Language Models Require Curated Context for Reliable Political Fact-Checking -- Even with Reasoning and Web Search
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Matthew R. DeVerna, Kai-Cheng Yang, Harry Yaojun Yan, Filippo Menczer
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注政治事实核查这一特定应用领域，这属于内容验证而非推荐、搜索或广告系统的核心排名问题。虽然提到了LLM和推理能力，但焦点是事实核查的可靠性，与RecSys/Search/Ads的核心技术进展或直接应用关联度极低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 04:22:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18749v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18749v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have raised hopes for automated end-to-end fact-checking, but prior studies report mixed results. As mainstream chatbots increasingly ship with reasoning capabilities and web search tools -- and millions of users already rely on them for verification -- rigorous evaluation is urgent. We evaluate 15 recent LLMs from OpenAI, Google, Meta, and DeepSeek on more than 6,000 claims fact-checked by PolitiFact, comparing standard models with reasoning- and web-search variants. Standard models perform poorly, reasoning offers minimal benefits, and web search provides only moderate gains, despite fact-checks being available on the web. In contrast, a curated RAG system using PolitiFact summaries improved macro F1 by 233% on average across model variants. These findings suggest that giving models access to curated high-quality context is a promising path for automated fact-checking.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19399v1" target="_blank" rel="noopener noreferrer">
                DR Tulu：基于演进式评估准则的强化学习在深度研究中的应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DR Tulu: Reinforcement Learning with Evolving Rubrics for Deep Research
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rulin Shao, Akari Asai, Shannon Zejiang Shen, Hamish Ivison, Varsha Kishore, Jin...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注强化学习在深度研究领域的应用，虽然提到了强化学习技术，但核心应用场景（深度研究）与推荐系统、搜索或广告领域没有直接关联。论文标题中未提及任何与Transformer架构、LLM技术或推荐系统相关的具体技术要素，因此对您当前关注的技术方向相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:35:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19399v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19399v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deep research models perform multi-step research to produce long-form, well-attributed answers. However, most open deep research models are trained on easily verifiable short-form QA tasks via reinforcement learning with verifiable rewards (RLVR), which does not extend to realistic long-form tasks. We address this with Reinforcement Learning with Evolving Rubrics (RLER), in which we construct and maintain rubrics that co-evolve with the policy model during training; this allows the rubrics to incorporate information that the model has newly explored and to provide discriminative, on-policy feedback. Using RLER, we develop Deep Research Tulu (DR Tulu-8B), the first open model that is directly trained for open-ended, long-form deep research. Across four long-form deep research benchmarks in science, healthcare and general domains, DR Tulu substantially outperforms existing open deep research models, and matches or exceeds proprietary deep research systems, while being significantly smaller and cheaper per query. To facilitate future research, we release all data, models, and code, including our new MCP-based agent infrastructure for deep research systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19168v1" target="_blank" rel="noopener noreferrer">
                RAVEN++：通过主动强化推理精确定位广告视频中的细粒度违规行为
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RAVEN++: Pinpointing Fine-Grained Violations in Advertisement Videos with Active Reinforcement Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Deyi Ji, Yuekui Yang, Liqun Liu, Peng Shu, Haiyang Wu, Shaogang Tang, Xudong Che...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注广告视频内容审核和违规检测，这属于广告领域的非排名相关主题。虽然涉及强化学习，但聚焦于内容安全审查而非推荐、搜索或广告排名优化。论文的技术方向与当前关注的LLM技术、Transformer架构进展或异构数据统一建模等核心方向关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:32:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19168v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19168v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Advertising (Ad) is a cornerstone of the digital economy, yet the moderation of video advertisements remains a significant challenge due to their complexity and the need for precise violation localization. While recent advancements, such as the RAVEN model, have improved coarse-grained violation detection, critical gaps persist in fine-grained understanding, explainability, and generalization. To address these limitations, we propose RAVEN++, a novel framework that introduces three key innovations: 1) Active Reinforcement Learning (RL), which dynamically adapts training to samples of varying difficulty; 2) Fine-Grained Violation Understanding, achieved through hierarchical reward functions and reasoning distillation; and 3) Progressive Multi-Stage Training, which systematically combines knowledge injection, curriculum-based passive RL, and active RL. Extensive experiments on both public and proprietary datasets, on both offline scenarios and online deployed A/B Testing, demonstrate that RAVEN++ outperforms general-purpose LLMs and specialized models like RAVEN in terms of fine-grained violation understanding, reasoning capabilities, and generalization ability.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19166v1" target="_blank" rel="noopener noreferrer">
                大型语言模型中真理的表征稳定性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Representational Stability of Truth in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Samantha Dies, Courtney Maynard, Germans Savcisens, Tina Eliassi-Rad
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究LLM中真理概念的表示稳定性，这属于LLM内部表示特性的理论研究。虽然涉及LLM技术，但论文聚焦于真理表示这一哲学/认知层面的问题，与推荐系统、搜索或广告的实际应用缺乏直接关联。该研究没有展示在RecSys/Search/Ads领域的潜在应用价值，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:28:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19166v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19166v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) are widely used for factual tasks such as "What treats asthma?" or "What is the capital of Latvia?". However, it remains unclear how stably LLMs encode distinctions between true, false, and neither-true-nor-false content in their internal probabilistic representations. We introduce representational stability as the robustness of an LLM's veracity representations to perturbations in the operational definition of truth. We assess representational stability by (i) training a linear probe on an LLM's activations to separate true from not-true statements and (ii) measuring how its learned decision boundary shifts under controlled label changes. Using activations from sixteen open-source models and three factual domains, we compare two types of neither statements. The first are fact-like assertions about entities we believe to be absent from any training data. We call these unfamiliar neither statements. The second are nonfactual claims drawn from well-known fictional contexts. We call these familiar neither statements. The unfamiliar statements induce the largest boundary shifts, producing up to $40\%$ flipped truth judgements in fragile domains (such as word definitions), while familiar fictional statements remain more coherently clustered and yield smaller changes ($\leq 8.2\%$). These results suggest that representational stability stems more from epistemic familiarity than from linguistic form. More broadly, our approach provides a diagnostic for auditing and training LLMs to preserve coherent truth assignments under semantic uncertainty, rather than optimizing for output accuracy alone.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19122v1" target="_blank" rel="noopener noreferrer">
                基于大语言模型的情感增强多任务学习用于方面类别情感分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Emotion-Enhanced Multi-Task Learning with LLMs for Aspect Category Sentiment Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yaping Chai, Haoran Xie, Joe S. Qin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注情感分析和多任务学习，属于纯粹的NLP应用领域。虽然涉及LLMs，但缺乏与推荐系统、搜索或广告领域的明确联系，也没有展示在异构数据处理方面的创新。情感分析在评论理解中有潜在应用，但论文标题未表明这种跨领域适用性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:52:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19122v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19122v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Aspect category sentiment analysis (ACSA) has achieved remarkable progress with large language models (LLMs), yet existing approaches primarily emphasize sentiment polarity while overlooking the underlying emotional dimensions that shape sentiment expressions. This limitation hinders the model's ability to capture fine-grained affective signals toward specific aspect categories. To address this limitation, we introduce a novel emotion-enhanced multi-task ACSA framework that jointly learns sentiment polarity and category-specific emotions grounded in Ekman's six basic emotions. Leveraging the generative capabilities of LLMs, our approach enables the model to produce emotional descriptions for each aspect category, thereby enriching sentiment representations with affective expressions. Furthermore, to ensure the accuracy and consistency of the generated emotions, we introduce an emotion refinement mechanism based on the Valence-Arousal-Dominance (VAD) dimensional framework. Specifically, emotions predicted by the LLM are projected onto a VAD space, and those inconsistent with their corresponding VAD coordinates are re-annotated using a structured LLM-based refinement strategy. Experimental results demonstrate that our approach significantly outperforms strong baselines on all benchmark datasets. This underlines the effectiveness of integrating affective dimensions into ACSA.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19083v1" target="_blank" rel="noopener noreferrer">
                基于知识检索、消歧和反思分析的多智能体LLM框架用于多领域低资源上下文命名实体识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Multi-Agent LLM Framework for Multi-Domain Low-Resource In-Context NER via Knowledge Retrieval, Disambiguation and Reflective Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenxuan Mu, Jinzhong Ning, Di Zhao, Yijia Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注命名实体识别(NER)这一特定NLP任务，属于纯粹的NLP应用领域。虽然提到了多智能体LLM框架和知识检索技术，但这些技术的应用场景局限于NER任务，没有明确展示在推荐系统、搜索或广告领域的潜在应用价值。论文的核心焦点是低资源NLP问题，而非推荐/搜索/广告系统的核心挑战。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:23:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19083v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19083v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In-context learning (ICL) with large language models (LLMs) has emerged as a promising paradigm for named entity recognition (NER) in low-resource scenarios. However, existing ICL-based NER methods suffer from three key limitations: (1) reliance on dynamic retrieval of annotated examples, which is problematic when annotated data is scarce; (2) limited generalization to unseen domains due to the LLM's insufficient internal domain knowledge; and (3) failure to incorporate external knowledge or resolve entity ambiguities. To address these challenges, we propose KDR-Agent, a novel multi-agent framework for multi-domain low-resource in-context NER that integrates Knowledge retrieval, Disambiguation, and Reflective analysis. KDR-Agent leverages natural-language type definitions and a static set of entity-level contrastive demonstrations to reduce dependency on large annotated corpora. A central planner coordinates specialized agents to (i) retrieve factual knowledge from Wikipedia for domain-specific mentions, (ii) resolve ambiguous entities via contextualized reasoning, and (iii) reflect on and correct model predictions through structured self-assessment. Experiments across ten datasets from five domains demonstrate that KDR-Agent significantly outperforms existing zero-shot and few-shot ICL baselines across multiple LLM backbones. The code and data can be found at https://github.com/MWXGOD/KDR-Agent.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19009v1" target="_blank" rel="noopener noreferrer">
                通过安全表示理解并缓解大型语言模型的过度拒绝问题
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Understanding and Mitigating Over-refusal for Large Language Models via Safety Representation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junbo Zhang, Ran Chen, Qianli Zhou, Xinyang Deng, Wen Jiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的安全性和拒绝行为，属于纯粹的LLM安全研究范畴。虽然涉及LLM技术，但专注于安全拒绝机制而非推荐系统、搜索或广告的应用潜力，与当前关注的四大方向均无直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:38:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19009v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19009v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models demonstrate powerful capabilities across various natural language processing tasks, yet they also harbor safety vulnerabilities. To enhance LLM safety, various jailbreak defense methods have been proposed to guard against harmful outputs. However, improvements in model safety often come at the cost of severe over-refusal, failing to strike a good balance between safety and usability. In this paper, we first analyze the causes of over-refusal from a representation perspective, revealing that over-refusal samples reside at the boundary between benign and malicious samples. Based on this, we propose MOSR, designed to mitigate over-refusal by intervening the safety representation of LLMs. MOSR incorporates two novel components: (1) Overlap-Aware Loss Weighting, which determines the erasure weight for malicious samples by quantifying their similarity to pseudo-malicious samples in the representation space, and (2) Context-Aware Augmentation, which supplements the necessary context for rejection decisions by adding harmful prefixes before rejection responses. Experiments demonstrate that our method outperforms existing approaches in mitigating over-refusal while largely maintaining safety. Overall, we advocate that future defense methods should strike a better balance between safety and over-refusal.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18992v1" target="_blank" rel="noopener noreferrer">
                用于聚类和嵌入的分类EM-PCA
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Classification EM-PCA for clustering and embedding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zineddine Tighidet, Lazhar Labiod, Mohamed Nadif
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种结合EM算法和PCA的聚类与嵌入方法，属于传统机器学习技术。虽然嵌入技术在推荐系统中有所应用，但该方法没有明确涉及推荐、搜索或广告领域的特定应用场景，也没有与LLM、Transformer架构或异构数据处理等当前关注领域建立直接联系。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:18:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18992v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18992v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">stat.ML</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The mixture model is undoubtedly one of the greatest contributions to clustering. For continuous data, Gaussian models are often used and the Expectation-Maximization (EM) algorithm is particularly suitable for estimating parameters from which clustering is inferred. If these models are particularly popular in various domains including image clustering, they however suffer from the dimensionality and also from the slowness of convergence of the EM algorithm. However, the Classification EM (CEM) algorithm, a classifying version, offers a fast convergence solution while dimensionality reduction still remains a challenge. Thus we propose in this paper an algorithm combining simultaneously and non-sequentially the two tasks --Data embedding and Clustering-- relying on Principal Component Analysis (PCA) and CEM. We demonstrate the interest of such approach in terms of clustering and data embedding. We also establish different connections with other clustering approaches.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18891v1" target="_blank" rel="noopener noreferrer">
                大语言模型贝叶斯优化的可复现性研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Reproducibility Study of Large Language Model Bayesian Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Adam Rychert, Gasper Spagnolo, Evgenii Posashkov
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然贝叶斯优化是超参数调优的重要方法，但该论文主要关注LLM训练过程的复现性研究，属于方法论验证范畴。这种复现性研究对RecSys/Search/Ads的实际应用价值有限，因为它不涉及核心架构创新、新应用场景或效率提升等直接影响业务的关键技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:48:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18891v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18891v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this reproducibility study, we revisit the LLAMBO framework of Daxberger et al. (2024), a prompting-based Bayesian optimization (BO) method that uses large language models as discriminative surrogates and acquisition optimizers via text-only interactions. We replicate the core Bayesmark and HPOBench experiments under the original evaluation protocol, but replace GPT-3.5 with the open-weight Llama 3.1 70B model used for all text encoding components. Our results broadly confirm the main claims of LLAMBO. Contextual warm starting via textual problem and hyperparameter descriptions substantially improves early regret behaviour and reduces variance across runs. LLAMBO's discriminative surrogate is weaker than GP or SMAC as a pure single task regressor, yet benefits from cross task semantic priors induced by the language model. Ablations that remove textual context markedly degrade predictive accuracy and calibration, while the LLAMBO candidate sampler consistently generates higher quality and more diverse proposals than TPE or random sampling. Experiments with smaller backbones (Gemma 27B, Llama 3.1 8B) yield unstable or invalid predictions, suggesting insufficient capacity for reliable surrogate behaviour. Overall, our study shows that the LLAMBO architecture is robust to changing the language model backbone and remains effective when instantiated with Llama 3.1 70B.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18889v1" target="_blank" rel="noopener noreferrer">
                CoreEval：利用真实世界知识自动构建抗污染数据集以实现可靠的LLM评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CoreEval: Automatically Building Contamination-Resilient Datasets with Real-World Knowledge toward Reliable LLM Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingqian Zhao, Bingbing Wang, Geng Tu, Yice Zhang, Qianlong Wang, Bin Liang, Jin...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM评估中的数据集污染问题，这属于纯粹的LLM评估基准主题，在无关主题列表中明确排除。虽然提到了真实世界知识，但核心焦点是评估可靠性而非在搜索、推荐或广告中的实际应用。该工作没有展示在推荐系统、搜索或广告领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:44:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18889v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18889v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Data contamination poses a significant challenge to the fairness of LLM evaluations in natural language processing tasks by inadvertently exposing models to test data during training. Current studies attempt to mitigate this issue by modifying existing datasets or generating new ones from freshly collected information. However, these methods fall short of ensuring contamination-resilient evaluation, as they fail to fully eliminate pre-existing knowledge from models or preserve the semantic complexity of the original datasets. To address these limitations, we propose \textbf{CoreEval}, a \textbf{Co}ntamination-\textbf{re}silient \textbf{Eval}uation strategy for automatically updating data with real-world knowledge. This approach begins by extracting entity relationships from the original data and leveraging the GDELT database to retrieve relevant, up-to-date knowledge. The retrieved knowledge is then recontextualized and integrated with the original data, which is refined and restructured to ensure semantic coherence and enhanced task relevance. Ultimately, a robust data reflection mechanism is employed to iteratively verify and refine labels, ensuring consistency between the updated and original datasets. Extensive experiments on updated datasets validate the robustness of CoreEval, demonstrating its effectiveness in mitigating performance overestimation caused by data contamination.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18850v1" target="_blank" rel="noopener noreferrer">
                基于大语言模型驱动代码进化的认知Alpha挖掘
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cognitive Alpha Mining via LLM-Driven Code-Based Evolution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fengyuan Liu, Huang Yi, Sichun Luo, Yuqi Wang, Yazheng Yang, Xinye Li, Zefa Hu, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了使用LLM进行代码生成和进化的技术，这可能属于'Enabling LLM Tech'范畴，但'认知Alpha挖掘'更偏向金融量化交易领域，与推荐系统、搜索或广告的直接关联性较弱。虽然LLM驱动的代码进化技术有潜力应用于算法优化，但缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 07:45:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18850v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18850v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Discovering effective predictive signals, or ``alphas,'' from financial data with high dimensionality and extremely low signal-to-noise ratio remains a difficult open problem. Despite progress in deep learning, genetic programming, and, more recently, large language model (LLM)--based factor generation, existing approaches still explore only a narrow region of the vast alpha search space. Neural models tend to produce opaque and fragile patterns, while symbolic or formula-based methods often yield redundant or economically ungrounded expressions that generalize poorly. Although different in form, these paradigms share a key limitation: none can conduct broad, structured, and human-like exploration that balances logical consistency with creative leaps. To address this gap, we introduce the Cognitive Alpha Mining Framework (CogAlpha), which combines code-level alpha representation with LLM-driven reasoning and evolutionary search. Treating LLMs as adaptive cognitive agents, our framework iteratively refines, mutates, and recombines alpha candidates through multi-stage prompts and financial feedback. This synergistic design enables deeper thinking, richer structural diversity, and economically interpretable alpha discovery, while greatly expanding the effective search space. Experiments on A-share equities demonstrate that CogAlpha consistently discovers alphas with superior predictive accuracy, robustness, and generalization over existing methods. Our results highlight the promise of aligning evolutionary optimization with LLM-based reasoning for automated and explainable alpha discovery. All source code will be released.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18848v1" target="_blank" rel="noopener noreferrer">
                用于捷克文档摘要的大型语言模型：从历史到现状
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Large Language Models for the Summarization of Czech Documents: From History to the Present
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Václav Tran, Jakub Šmíd, Ladislav Lenc, Jean-Pierre Salmon, Pavel Král
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于特定语言（捷克语）的文档摘要任务，这属于纯粹的LLM应用场景，与推荐系统、搜索或广告的核心技术发展无关。虽然摘要技术在某些搜索场景中可能有间接应用，但论文的特定语言焦点和摘要应用使其与当前关注的核心领域相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 07:40:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18848v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18848v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text summarization is the task of automatically condensing longer texts into shorter, coherent summaries while preserving the original meaning and key information. Although this task has been extensively studied in English and other high-resource languages, Czech summarization, particularly in the context of historical documents, remains underexplored. This is largely due to the inherent linguistic complexity of Czech and the lack of high-quality annotated datasets. In this work, we address this gap by leveraging the capabilities of Large Language Models (LLMs), specifically Mistral and mT5, which have demonstrated strong performance across a wide range of natural language processing tasks and multilingual settings. In addition, we also propose a translation-based approach that first translates Czech texts into English, summarizes them using an English-language model, and then translates the summaries back into Czech. Our study makes the following main contributions: We demonstrate that LLMs achieve new state-of-the-art results on the SumeCzech dataset, a benchmark for modern Czech text summarization, showing the effectiveness of multilingual LLMs even for morphologically rich, medium-resource languages like Czech. We introduce a new dataset, Posel od Čerchova, designed for the summarization of historical Czech texts. This dataset is derived from digitized 19th-century publications and annotated for abstractive summarization. We provide initial baselines using modern LLMs to facilitate further research in this underrepresented area. By combining cutting-edge models with both modern and historical Czech datasets, our work lays the foundation for further progress in Czech summarization and contributes valuable resources for future research in Czech historical document processing and low-resource summarization more broadly.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18843v1" target="_blank" rel="noopener noreferrer">
                焦点小组分析中神经主题建模的可复现框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Reproducible Framework for Neural Topic Modeling in Focus Group Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Heger Arfaoui, Mohammed Iheb Hergli, Beya Benzina, Slimane BenMiled
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于主题建模在焦点小组分析中的应用，属于传统的NLP任务范畴。虽然主题建模在推荐系统中可用于内容理解，但该论文强调可复现性和焦点小组分析这一特定应用场景，与当前关注的LLM技术、Transformer架构进展或异构数据统一建模等核心方向关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 07:30:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18843v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18843v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.HC</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Focus group discussions generate rich qualitative data but their analysis traditionally relies on labor-intensive manual coding that limits scalability and reproducibility. We present a rigorous, reproducible computational framework for applying neural topic modeling to focus group transcripts, addressing fundamental methodological challenges: hyperparameter sensitivity, model stability, and validation of interpretability. Using BERTopic applied to ten focus groups exploring HPV vaccine perceptions in Tunisia (1,076 utterances), we conducted systematic evaluation across 27 hyperparameter configurations, assessed stability through bootstrap resampling with 30 replicates per configuration, and validated interpretability through formal human evaluation by three domain experts. Our analysis demonstrates substantial sensitivity to hyperparameter choices and reveals that metric selection for stability assessment must align with analytical goals. A hierarchical merging strategy (extracting fine-grained topics for stability then consolidating for interpretability) effectively navigates the stability-coherence tradeoff, achieving coherence of 0.558 compared to 0.539 for direct extraction. Human validation confirmed topic quality with very good inter-rater reliability (ICC = 0.79, weighted Cohen's kappa = 0.578). Our framework provides practical guidelines that researchers can adapt to their own qualitative research contexts. All code, data processing scripts, and evaluation protocols are publicly available to support reproduction and extension of this work.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18832v1" target="_blank" rel="noopener noreferrer">
                概念而非文档：基于抽象语义表示的上下文概念熵压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Concept than Document: Context Compression via AMR-based Conceptual Entropy
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaize Shi, Xueyao Sun, Xiaohui Tao, Lin Li, Qika Lin, Guandong Xu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于基于AMR（抽象语义表示）的概念压缩技术，这属于文档/文本压缩的特定领域，与推荐系统、搜索或广告的核心进展缺乏直接关联。虽然上下文压缩在理论上可能应用于长序列建模的效率提升，但论文标题明确聚焦于AMR和概念熵这种高度专业化的NLP技术，缺乏明确的RecSys/Search/Ads应用场景或转换器架构改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 07:08:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18832v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18832v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) face information overload when handling long contexts, particularly in Retrieval-Augmented Generation (RAG) where extensive supporting documents often introduce redundant content. This issue not only weakens reasoning accuracy but also increases computational overhead. We propose an unsupervised context compression framework that exploits Abstract Meaning Representation (AMR) graphs to preserve semantically essential information while filtering out irrelevant text. By quantifying node-level entropy within AMR graphs, our method estimates the conceptual importance of each node, enabling the retention of core semantics. Specifically, we construct AMR graphs from raw contexts, compute the conceptual entropy of each node, and screen significant informative nodes to form a condensed and semantically focused context than raw documents. Experiments on the PopQA and EntityQuestions datasets show that our method outperforms vanilla and other baselines, achieving higher accuracy while substantially reducing context length. To the best of our knowledge, this is the first work introducing AMR-based conceptual entropy for context compression, demonstrating the potential of stable linguistic features in context engineering.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18743v1" target="_blank" rel="noopener noreferrer">
                RhinoInsight：通过模型行为和上下文控制机制改进深度研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RhinoInsight: Improving Deep Research through Control Mechanisms for Model Behavior and Context
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Lei, Shuzheng Si, Wei Wang, Yifei Wu, Gang Chen, Fanchao Qi, Maosong Sun
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题主要关注模型行为控制和上下文管理，这属于通用的AI研究工具或框架范畴。虽然控制机制可能间接影响模型可靠性，但没有明确指向推荐系统、搜索或广告的具体应用场景，也未涉及Transformer架构改进或LLM核心技术进步。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 04:12:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18743v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18743v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models are evolving from single-turn responders into tool-using agents capable of sustained reasoning and decision-making for deep research. Prevailing systems adopt a linear pipeline of plan to search to write to a report, which suffers from error accumulation and context rot due to the lack of explicit control over both model behavior and context. We introduce RhinoInsight, a deep research framework that adds two control mechanisms to enhance robustness, traceability, and overall quality without parameter updates. First, a Verifiable Checklist module transforms user requirements into traceable and verifiable sub-goals, incorporates human or LLM critics for refinement, and compiles a hierarchical outline to anchor subsequent actions and prevent non-executable planning. Second, an Evidence Audit module structures search content, iteratively updates the outline, and prunes noisy context, while a critic ranks and binds high-quality evidence to drafted content to ensure verifiability and reduce hallucinations. Our experiments demonstrate that RhinoInsight achieves state-of-the-art performance on deep research tasks while remaining competitive on deep search tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18696v1" target="_blank" rel="noopener noreferrer">
                共情级联网络：一种用于减少大型语言模型中社会偏见的多阶段提示技术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Empathetic Cascading Networks: A Multi-Stage Prompting Technique for Reducing Social Biases in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wangjiaxuan Xin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注减少LLM中的社会偏见，这属于公平性和伦理范畴，属于明确的无关主题。虽然提到了提示技术，但核心应用是偏见缓解而非推荐、搜索或广告系统的实际应用。没有明确的技术机制可以转化为推荐、搜索或广告领域的实际应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 02:32:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18696v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18696v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This report presents the Empathetic Cascading Networks (ECN) framework, a multi-stage prompting method designed to enhance the empathetic and inclusive capabilities of large language models. ECN employs four stages: Perspective Adoption, Emotional Resonance, Reflective Understanding, and Integrative Synthesis, to guide models toward generating emotionally resonant and contextually aware responses. Experimental results demonstrate that ECN achieves the highest Empathy Quotient (EQ) scores across GPT-3.5-turbo and GPT-4, while maintaining competitive Regard and Perplexity metrics. These findings emphasize ECN's potential for applications requiring empathy and inclusivity in conversational AI.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19436v1" target="_blank" rel="noopener noreferrer">
                VDC-Agent：当视频详细描述器通过智能体自我反思实现自我进化时
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VDC-Agent: When Video Detailed Captioners Evolve Themselves via Agentic Self-Reflection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qiang Wang, Xinyuan Gao, SongLin Dong, Jizhou Han, Jiangyang Li, Yuhang He, Yiho...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频描述生成和智能体自我反思技术，属于计算机视觉和AIGC领域。虽然智能体技术可能有潜在的推荐系统应用，但论文标题明确聚焦于视频描述生成，这属于内容生成而非推荐/搜索/广告的核心排序任务。没有明确证据表明该技术会直接应用于推荐系统、搜索或广告领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19436v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19436v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present VDC-Agent, a self-evolving framework for Video Detailed Captioning that requires neither human annotations nor larger teacher models. The agent forms a closed loop of caption generation, principle-guided scoring (score and textual suggestions), and prompt refinement. When caption quality regresses, a self-reflection path leverages the previous chain-of-thought to amend the update. Running this process on unlabeled videos produces trajectories of (caption, score) pairs. We convert the trajectories into preference tuples and filter out samples with JSON parsing errors, resulting in VDC-Agent-19K, which contains 18,886 automatically constructed pairs. We then fine-tune the base MLLM on this dataset using an easy-to-hard curriculum direct preference optimization. Built on Qwen2.5-VL-7B-Instruct, our VDC-Agent-7B attains state-of-the-art performance on the VDC benchmark with 49.08% average accuracy and 2.50 score, surpassing specialized video captioners and improving over the base model by +5.13% accuracy and +0.27 score at similar inference cost.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19435v1" target="_blank" rel="noopener noreferrer">
                图像到视频模型是优秀的零样本图像编辑器吗？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Are Image-to-Video Models Good Zero-Shot Image Editors?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zechuan Zhang, Zhenyuan Chen, Zongxin Yang, Yi Yang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究图像到视频模型在图像编辑任务中的零样本性能，属于计算机视觉和生成式AI领域。虽然标题提到了多模态转换，但焦点是图像编辑而非推荐系统、搜索或广告中的异构数据统一建模。该工作与视觉内容生成更相关，缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19435v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19435v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large-scale video diffusion models show strong world simulation and temporal reasoning abilities, but their use as zero-shot image editors remains underexplored. We introduce IF-Edit, a tuning-free framework that repurposes pretrained image-to-video diffusion models for instruction-driven image editing. IF-Edit addresses three key challenges: prompt misalignment, redundant temporal latents, and blurry late-stage frames. It includes (1) a chain-of-thought prompt enhancement module that transforms static editing instructions into temporally grounded reasoning prompts; (2) a temporal latent dropout strategy that compresses frame latents after the expert-switch point, accelerating denoising while preserving semantic and temporal coherence; and (3) a self-consistent post-refinement step that sharpens late-stage frames using a short still-video trajectory. Experiments on four public benchmarks, covering non-rigid editing, physical and temporal reasoning, and general instruction edits, show that IF-Edit performs strongly on reasoning-centric tasks while remaining competitive on general-purpose edits. Our study provides a systematic view of video diffusion models as image editors and highlights a simple recipe for unified video-image generative reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19434v1" target="_blank" rel="noopener noreferrer">
                通过合并预训练专家打破扩散模型中的似然-质量权衡
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Breaking the Likelihood-Quality Trade-off in Diffusion Models by Merging Pretrained Experts
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yasin Esfandiari, Stefan Bauer, Sebastian U. Stich, Andrea Dittadi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注扩散模型的改进，属于生成模型领域。虽然扩散模型技术可能间接影响内容生成，但论文本身专注于模型质量与似然的权衡问题，没有明确指向推荐系统、搜索或广告的直接应用。这更接近纯粹的生成模型研究，而非排名或个性化领域的核心技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19434v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19434v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">stat.ML</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion models for image generation often exhibit a trade-off between perceptual sample quality and data likelihood: training objectives emphasizing high-noise denoising steps yield realistic images but poor likelihoods, whereas likelihood-oriented training overweights low-noise steps and harms visual fidelity. We introduce a simple plug-and-play sampling method that combines two pretrained diffusion experts by switching between them along the denoising trajectory. Specifically, we apply an image-quality expert at high noise levels to shape global structure, then switch to a likelihood expert at low noise levels to refine pixel statistics. The approach requires no retraining or fine-tuning -- only the choice of an intermediate switching step. On CIFAR-10 and ImageNet32, the merged model consistently matches or outperforms its base components, improving or preserving both likelihood and sample quality relative to each expert alone. These results demonstrate that expert switching across noise levels is an effective way to break the likelihood-quality trade-off in image diffusion models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19401v1" target="_blank" rel="noopener noreferrer">
                视频内指令：将视觉信号作为生成式控制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            In-Video Instructions: Visual Signals as Generative Control
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gongfan Fang, Xinyin Ma, Xinchao Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉信号作为生成控制，属于视觉-生成交叉领域，与推荐系统、搜索或广告的核心技术关联较弱。虽然标题暗示了多模态处理，但缺乏明确的RecSys/Search/Ads应用场景，且更偏向纯粹的视觉内容生成而非排名或推荐任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:38:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19401v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19401v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large-scale video generative models have recently demonstrated strong visual capabilities, enabling the prediction of future frames that adhere to the logical and physical cues in the current observation. In this work, we investigate whether such capabilities can be harnessed for controllable image-to-video generation by interpreting visual signals embedded within the frames as instructions, a paradigm we term In-Video Instruction. In contrast to prompt-based control, which provides textual descriptions that are inherently global and coarse, In-Video Instruction encodes user guidance directly into the visual domain through elements such as overlaid text, arrows, or trajectories. This enables explicit, spatial-aware, and unambiguous correspondences between visual subjects and their intended actions by assigning distinct instructions to different objects. Extensive experiments on three state-of-the-art generators, including Veo 3.1, Kling 2.5, and Wan 2.2, show that video models can reliably interpret and execute such visually embedded instructions, particularly in complex multi-object scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19343v1" target="_blank" rel="noopener noreferrer">
                Syn-GRPO：面向多模态大语言模型感知推理的自演进数据合成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Syn-GRPO: Self-Evolving Data Synthesis for MLLM Perception Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qihan Huang, Haofei Zhang, Rong Wei, Yi Wang, Rui Tang, Mingli Song, Jie Song
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态大语言模型（MLLM）的数据合成技术，属于纯粹的LLM技术范畴，与推荐系统、搜索或广告的核心进展关联度较低。虽然数据合成技术可能间接影响模型性能，但论文标题未表明其在RecSys/Search/Ads领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:42:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19343v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19343v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    RL (reinforcement learning) methods (e.g., GRPO) for MLLM (Multimodal LLM) perception ability has attracted wide research interest owing to its remarkable generalization ability. Nevertheless, existing reinforcement learning methods still face the problem of low data quality, where data samples cannot elicit diverse responses from MLLMs, thus restricting the exploration scope for MLLM reinforcement learning. Some methods attempt to mitigate this problem by imposing constraints on entropy, but none address it at its root. Therefore, to tackle this problem, this work proposes Syn-GRPO (Synthesis-GRPO), which employs an online data generator to synthesize high-quality training data with diverse responses in GRPO training. Specifically, Syn-GRPO consists of two components: (1) data server; (2) GRPO workflow. The data server synthesizes new samples from existing ones using an image generation model, featuring a decoupled and asynchronous scheme to achieve high generation efficiency. The GRPO workflow provides the data server with the new image descriptions, and it leverages a diversity reward to supervise the MLLM to predict image descriptions for synthesizing samples with diverse responses. Experiment results across three visual perception tasks demonstrate that Syn-GRPO improves the data quality by a large margin, achieving significant superior performance to existing MLLM perception methods, and Syn-GRPO presents promising potential for scaling long-term self-evolving RL. Our code is available at https://github.com/hqhQAQ/Syn-GRPO.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19339v1" target="_blank" rel="noopener noreferrer">
                POUR：一种通过神经坍缩实现表示遗忘的可证明最优方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            POUR: A Provably Optimal Method for Unlearning Representations via Neural Collapse
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Anjie Le, Can Peng, Yuyuan Liu, J. Alison Noble
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注表示遗忘和神经坍缩，属于机器学习安全性和隐私保护范畴，这些主题被明确列为不相关领域。虽然表示学习在推荐系统中具有基础作用，但该工作的核心焦点是遗忘机制而非表示学习本身，缺乏与推荐、搜索或广告系统的直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:38:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19339v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19339v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In computer vision, machine unlearning aims to remove the influence of specific visual concepts or training images without retraining from scratch. Studies show that existing approaches often modify the classifier while leaving internal representations intact, resulting in incomplete forgetting. In this work, we extend the notion of unlearning to the representation level, deriving a three-term interplay between forgetting efficacy, retention fidelity, and class separation. Building on Neural Collapse theory, we show that the orthogonal projection of a simplex Equiangular Tight Frame (ETF) remains an ETF in a lower dimensional space, yielding a provably optimal forgetting operator. We further introduce the Representation Unlearning Score (RUS) to quantify representation-level forgetting and retention fidelity. Building on this, we introduce POUR (Provably Optimal Unlearning of Representations), a geometric projection method with closed-form (POUR-P) and a feature-level unlearning variant under a distillation scheme (POUR-D). Experiments on CIFAR-10/100 and PathMNIST demonstrate that POUR achieves effective unlearning while preserving retained knowledge, outperforming state-of-the-art unlearning methods on both classification-level and representation-level metrics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19306v1" target="_blank" rel="noopener noreferrer">
                面向语言引导红外小目标检测的双粒度语义提示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dual-Granularity Semantic Prompting for Language Guidance Infrared Small Target Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zixuan Wang, Haoran Sun, Jiaming Lu, Wenxuan Wang, Zhongling Huang, Dingwen Zhan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注红外小目标检测这一计算机视觉任务，虽然涉及语言引导，但其核心应用领域是红外成像和目标检测，与推荐系统、搜索或广告没有直接关联。红外小目标检测主要应用于军事、安防、遥感等视觉领域，缺乏在RecSys/Search/Ads中的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:58:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19306v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19306v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Infrared small target detection remains challenging due to limited feature representation and severe background interference, resulting in sub-optimal performance. While recent CLIP-inspired methods attempt to leverage textual guidance for detection, they are hindered by inaccurate text descriptions and reliance on manual annotations. To overcome these limitations, we propose DGSPNet, an end-to-end language prompt-driven framework. Our approach integrates dual-granularity semantic prompts: coarse-grained textual priors (e.g., 'infrared image', 'small target') and fine-grained personalized semantic descriptions derived through visual-to-textual mapping within the image space. This design not only facilitates learning fine-grained semantic information but also can inherently leverage language prompts during inference without relying on any annotation requirements. By fully leveraging the precision and conciseness of text descriptions, we further introduce a text-guide channel attention (TGCA) mechanism and text-guide spatial attention (TGSA) mechanism that enhances the model's sensitivity to potential targets across both low- and high-level feature spaces. Extensive experiments demonstrate that our method significantly improves detection accuracy and achieves state-of-the-art performance on three benchmark datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19274v1" target="_blank" rel="noopener noreferrer">
                基于扩散重建的核心集选择数据似然估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Diffusion Reconstruction-based Data Likelihood Estimation for Core-Set Selection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mingyang Chen, Jiawei Du, Bo Huang, Yi Wang, Xiaobo Zhang, Wei Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注核心集选择和扩散模型的数据似然估计，属于数据选择/采样技术领域。虽然核心集选择在理论上可以应用于推荐/搜索系统的训练数据优化，但论文标题没有明确指向推荐系统、搜索或广告的具体应用场景，与当前关注的LLM技术、Transformer架构或直接应用关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:25:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19274v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19274v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Existing core-set selection methods predominantly rely on heuristic scoring signals such as training dynamics or model uncertainty, lacking explicit modeling of data likelihood. This omission may hinder the constructed subset from capturing subtle yet critical distributional structures that underpin effective model training. In this work, we propose a novel, theoretically grounded approach that leverages diffusion models to estimate data likelihood via reconstruction deviation induced by partial reverse denoising. Specifically, we establish a formal connection between reconstruction error and data likelihood, grounded in the Evidence Lower Bound (ELBO) of Markovian diffusion processes, thereby enabling a principled, distribution-aware scoring criterion for data selection. Complementarily, we introduce an efficient information-theoretic method to identify the optimal reconstruction timestep, ensuring that the deviation provides a reliable signal indicative of underlying data likelihood. Extensive experiments on ImageNet demonstrate that reconstruction deviation offers an effective scoring criterion, consistently outperforming existing baselines across selection ratios, and closely matching full-data training using only 50% of the data. Further analysis shows that the likelihood-informed nature of our score reveals informative insights in data selection, shedding light on the interplay between data distributional characteristics and model learning preferences.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19235v1" target="_blank" rel="noopener noreferrer">
                IDSplat：面向驾驶场景的实例分解3D高斯泼溅技术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IDSplat: Instance-Decomposed 3D Gaussian Splatting for Driving Scenes
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Carl Lindström, Mahan Rafidashti, Maryam Fatemi, Lars Hammarstrand, Martin R. Os...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D场景重建和计算机视觉技术，属于纯粹的视觉领域研究。虽然涉及驾驶场景，但缺乏与推荐系统、搜索或广告的直接关联，也没有展示如何将3D高斯泼溅技术应用于这些领域的潜力。这更像是自动驾驶或机器人领域的视觉技术，而非推荐/搜索/广告相关的核心进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:48:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19235v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19235v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reconstructing dynamic driving scenes is essential for developing autonomous systems through sensor-realistic simulation. Although recent methods achieve high-fidelity reconstructions, they either rely on costly human annotations for object trajectories or use time-varying representations without explicit object-level decomposition, leading to intertwined static and dynamic elements that hinder scene separation. We present IDSplat, a self-supervised 3D Gaussian Splatting framework that reconstructs dynamic scenes with explicit instance decomposition and learnable motion trajectories, without requiring human annotations. Our key insight is to model dynamic objects as coherent instances undergoing rigid transformations, rather than unstructured time-varying primitives. For instance decomposition, we employ zero-shot, language-grounded video tracking anchored to 3D using lidar, and estimate consistent poses via feature correspondences. We introduce a coordinated-turn smoothing scheme to obtain temporally and physically consistent motion trajectories, mitigating pose misalignments and tracking failures, followed by joint optimization of object poses and Gaussian parameters. Experiments on the Waymo Open Dataset demonstrate that our method achieves competitive reconstruction quality while maintaining instance-level decomposition and generalizes across diverse sequences and view densities without retraining, making it practical for large-scale autonomous driving applications. Code will be released.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19217v1" target="_blank" rel="noopener noreferrer">
                ReAlign：通过步骤感知奖励引导对齐的文本到运动生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReAlign: Text-to-Motion Generation via Step-Aware Reward-Guided Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wanjiang Weng, Xiaofeng Tan, Junbo Wang, Guo-Sen Xie, Pan Zhou, Hongsong Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到运动生成，属于计算机视觉和动作生成领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然提到了对齐和奖励引导技术，但这些方法在推荐/搜索/广告中的潜在应用场景不明确，主要服务于内容生成而非排名或个性化任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:23:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19217v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19217v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-to-motion generation, which synthesizes 3D human motions from text inputs, holds immense potential for applications in gaming, film, and robotics. Recently, diffusion-based methods have been shown to generate more diversity and realistic motion. However, there exists a misalignment between text and motion distributions in diffusion models, which leads to semantically inconsistent or low-quality motions. To address this limitation, we propose Reward-guided sampling Alignment (ReAlign), comprising a step-aware reward model to assess alignment quality during the denoising sampling and a reward-guided strategy that directs the diffusion process toward an optimally aligned distribution. This reward model integrates step-aware tokens and combines a text-aligned module for semantic consistency and a motion-aligned module for realism, refining noisy motions at each timestep to balance probability density and alignment. Extensive experiments of both motion generation and retrieval tasks demonstrate that our approach significantly improves text-motion alignment and motion quality compared to existing state-of-the-art methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19200v1" target="_blank" rel="noopener noreferrer">
                现代视觉模型能否理解物体与其外观相似物之间的差异？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Can Modern Vision Models Understand the Difference Between an Object and a Look-alike?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Itay Cohen, Ethan Fetaya, Amir Rosenfeld
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉模型的物体识别能力，属于纯粹的计算机视觉研究。虽然视觉模型在商品识别等场景中有潜在应用，但论文标题未表明与推荐系统、搜索或广告的直接关联，也未涉及多模态学习或Transformer架构的改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:09:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19200v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19200v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in computer vision have yielded models with strong performance on recognition benchmarks; however, significant gaps remain in comparison to human perception. One subtle ability is to judge whether an image looks like a given object without being an instance of that object. We study whether vision-language models such as CLIP capture this distinction. We curated a dataset named RoLA (Real or Lookalike) of real and lookalike exemplars (e.g., toys, statues, drawings, pareidolia) across multiple categories, and first evaluate a prompt-based baseline with paired "real"/"lookalike" prompts. We then estimate a direction in CLIP's embedding space that moves representations between real and lookalike. Applying this direction to image and text embeddings improves discrimination in cross-modal retrieval on Conceptual12M, and also enhances captions produced by a CLIP prefix captioner.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19199v1" target="_blank" rel="noopener noreferrer">
                CLASH：跨模态矛盾检测基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CLASH: A Benchmark for Cross-Modal Contradiction Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Teodora Popordanoska, Jiameng Li, Matthew B. Blaschko
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注跨模态矛盾检测的基准构建，这属于评估基准范畴，与我的关注点中排除的'评估基准'直接相关。虽然跨模态学习在概念上与VLM对异构数据的处理有相似性，但该工作侧重于检测矛盾而非统一建模，且作为基准研究缺乏明确的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:09:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19199v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19199v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Contradictory multimodal inputs are common in real-world settings, yet existing benchmarks typically assume input consistency and fail to evaluate cross-modal contradiction detection - a fundamental capability for preventing hallucinations and ensuring reliability. We introduce CLASH, a novel benchmark for multimodal contradiction detection, featuring COCO images paired with contradictory captions containing controlled object-level or attribute-level contradictions. The samples include targeted questions evaluated in both multiple-choice and open-ended formats. The benchmark provides an extensive fine-tuning set filtered through automated quality checks, alongside a smaller human-verified diagnostic set. Our analysis of state-of-the-art models reveals substantial limitations in recognizing cross-modal conflicts, exposing systematic modality biases and category-specific weaknesses. Furthermore, we empirically demonstrate that targeted fine-tuning on CLASH substantially enhances conflict detection capabilities.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19169v1" target="_blank" rel="noopener noreferrer">
                图像复原的测试时偏好优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Test-Time Preference Optimization for Image Restoration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bingchen Li, Xin Li, Jiaqi Xu, Jiaming Guo, Wenbo Li, Renjing Pei, Zhibo Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像复原领域的测试时优化技术，属于计算机视觉应用范畴。虽然偏好优化概念在推荐系统中具有相关性，但本文明确针对图像处理任务，与搜索、推荐或广告系统的核心排名和建模问题缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:32:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19169v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19169v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Image restoration (IR) models are typically trained to recover high-quality images using L1 or LPIPS loss. To handle diverse unknown degradations, zero-shot IR methods have also been introduced. However, existing pre-trained and zero-shot IR approaches often fail to align with human preferences, resulting in restored images that may not be favored. This highlights the critical need to enhance restoration quality and adapt flexibly to various image restoration tasks or backbones without requiring model retraining and ideally without labor-intensive preference data collection. In this paper, we propose the first Test-Time Preference Optimization (TTPO) paradigm for image restoration, which enhances perceptual quality, generates preference data on-the-fly, and is compatible with any IR model backbone. Specifically, we design a training-free, three-stage pipeline: (i) generate candidate preference images online using diffusion inversion and denoising based on the initially restored image; (ii) select preferred and dispreferred images using automated preference-aligned metrics or human feedback; and (iii) use the selected preference images as reward signals to guide the diffusion denoising process, optimizing the restored image to better align with human preferences. Extensive experiments across various image restoration tasks and models demonstrate the effectiveness and flexibility of the proposed pipeline.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19126v1" target="_blank" rel="noopener noreferrer">
                当语义调控时：重新思考用于CLIP生成图像检测的补丁洗牌与内部偏差
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            When Semantics Regulate: Rethinking Patch Shuffle and Internal Bias for Generated Image Detection with CLIP
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Beilin Chu, Weike You, Mengtao Li, Tingting Zheng, Kehan Zhao, Xuan Xu, Zhigao L...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注生成图像检测，这是一个计算机视觉任务，与推荐系统、搜索或广告的核心技术没有直接关联。虽然CLIP技术本身具有多模态能力，但论文聚焦于图像伪造检测这一特定应用场景，缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:54:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19126v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19126v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid progress of GANs and Diffusion Models poses new challenges for detecting AI-generated images. Although CLIP-based detectors exhibit promising generalization, they often rely on semantic cues rather than generator artifacts, leading to brittle performance under distribution shifts. In this work, we revisit the nature of semantic bias and uncover that Patch Shuffle provides an unusually strong benefit for CLIP, that disrupts global semantic continuity while preserving local artifact cues, which reduces semantic entropy and homogenizes feature distributions between natural and synthetic images. Through a detailed layer-wise analysis, we further show that CLIP's deep semantic structure functions as a regulator that stabilizes cross-domain representations once semantic bias is suppressed. Guided by these findings, we propose SemAnti, a semantic-antagonistic fine-tuning paradigm that freezes the semantic subspace and adapts only artifact-sensitive layers under shuffled semantics. Despite its simplicity, SemAnti achieves state-of-the-art cross-domain generalization on AIGCDetectBenchmark and GenImage, demonstrating that regulating semantics is key to unlocking CLIP's full potential for robust AI-generated image detection.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19119v1" target="_blank" rel="noopener noreferrer">
                MonoSR：基于单目图像的开放词汇空间推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MonoSR: Open-Vocabulary Spatial Reasoning from Monocular Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qirui Wang, Jingyi He, Yining Pan, Si Yong Yeo, Xulei Yang, Shijie Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的单目图像空间推理，属于纯粹的视觉任务，与推荐系统、搜索或广告没有直接关联。虽然标题中提到'开放词汇'可能涉及语言理解，但核心焦点是空间推理这一视觉能力，无法看出在RecSys/Search/Ads中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:49:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19119v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19119v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Spatial reasoning (SR), the ability to infer 3D spatial information from 2D inputs, is essential for real-world applications such as embodied AI and autonomous driving. However, existing research primarily focuses on indoor environments and typically relies on multi-view observations, which limits their generalizability to outdoor scenarios and constrains their applicability to monocular images, the most common real-world setting. In this work, we propose MonoSR, a large-scale monocular spatial reasoning dataset that spans diverse scenarios including indoor, outdoor, and object-centric settings, and supports multiple question types. MonoSR provides a path toward open-world monocular spatial reasoning. Beyond introducing the dataset, we evaluate advanced vision-language models to reveal their limitations on this challenging task. We further analyze whether auxiliary information is crucial for monocular spatial reasoning and offer practical guidance for designing future models. These contributions collectively establish a foundation for advancing monocular spatial reasoning in real-world, open-world environments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19080v1" target="_blank" rel="noopener noreferrer">
                基于伪造感知音视频自适应的可泛化深度伪造检测：一种变分贝叶斯方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Generalizable Deepfake Detection via Forgery-aware Audio-Visual Adaptation: A Variational Bayesian Approach
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fan Nie, Jiangqun Ni, Jian Zhang, Bin Zhang, Weizhe Zhang, Bin Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注深度伪造检测，属于计算机视觉和多媒体安全领域，与推荐系统、搜索或广告的核心技术无直接关联。虽然变分贝叶斯方法在理论上可能应用于其他领域，但论文的焦点是伪造检测而非推荐或搜索场景中的异构数据处理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:20:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19080v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19080v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.MM</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The widespread application of AIGC contents has brought not only unprecedented opportunities, but also potential security concerns, e.g., audio-visual deepfakes. Therefore, it is of great importance to develop an effective and generalizable method for multi-modal deepfake detection. Typically, the audio-visual correlation learning could expose subtle cross-modal inconsistencies, e.g., audio-visual misalignment, which serve as crucial clues in deepfake detection. In this paper, we reformulate the correlation learning with variational Bayesian estimation, where audio-visual correlation is approximated as a Gaussian distributed latent variable, and thus develop a novel framework for deepfake detection, i.e., Forgery-aware Audio-Visual Adaptation with Variational Bayes (FoVB). Specifically, given the prior knowledge of pre-trained backbones, we adopt two core designs to estimate audio-visual correlations effectively. First, we exploit various difference convolutions and a high-pass filter to discern local and global forgery traces from both modalities. Second, with the extracted forgery-aware features, we estimate the latent Gaussian variable of audio-visual correlation via variational Bayes. Then, we factorize the variable into modality-specific and correlation-specific ones with orthogonality constraint, allowing them to better learn intra-modal and cross-modal forgery traces with less entanglement. Extensive experiments demonstrate that our FoVB outperforms other state-of-the-art methods in various benchmarks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19067v1" target="_blank" rel="noopener noreferrer">
                DynaMix：通过动态重标注与混合数据采样的可泛化行人重识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DynaMix: Generalizable Person Re-identification via Dynamic Relabeling and Mixed Data Sampling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Timur Mamedov, Anton Konushin, Vadim Konushin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域中的行人重识别任务，主要涉及图像识别和跨摄像头追踪技术。虽然论文提到了泛化性和数据采样方法，但这些技术主要针对视觉识别场景，与推荐系统、搜索或广告中的用户行为建模、内容排序等核心问题缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:01:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19067v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19067v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generalizable person re-identification (Re-ID) aims to recognize individuals across unseen cameras and environments. While existing methods rely heavily on limited labeled multi-camera data, we propose DynaMix, a novel method that effectively combines manually labeled multi-camera and large-scale pseudo-labeled single-camera data. Unlike prior works, DynaMix dynamically adapts to the structure and noise of the training data through three core components: (1) a Relabeling Module that refines pseudo-labels of single-camera identities on-the-fly; (2) an Efficient Centroids Module that maintains robust identity representations under a large identity space; and (3) a Data Sampling Module that carefully composes mixed data mini-batches to balance learning complexity and intra-batch diversity. All components are specifically designed to operate efficiently at scale, enabling effective training on millions of images and hundreds of thousands of identities. Extensive experiments demonstrate that DynaMix consistently outperforms state-of-the-art methods in generalizable person Re-ID.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19062v1" target="_blank" rel="noopener noreferrer">
                粒度计算驱动的SAM：从粗到细的引导实现无提示分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Granular Computing-driven SAM: From Coarse-to-Fine Guidance for Prompt-Free Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qiyang Yu, Yu Fang, Tianrui Li, Xuemei Cao, Yan Chen, Jianghao Li, Fan Min, Yi Z...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的图像分割技术，特别是基于SAM（Segment Anything Model）的改进方法。虽然SAM本身是基础模型，但该工作专注于纯视觉任务的分割改进，没有明显涉及推荐系统、搜索或广告领域的潜在应用。粒度计算和从粗到细的分割方法主要针对视觉理解任务，与文本、序列或异构数据处理无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:55:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19062v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19062v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Prompt-free image segmentation aims to generate accurate masks without manual guidance. Typical pre-trained models, notably Segmentation Anything Model (SAM), generate prompts directly at a single granularity level. However, this approach has two limitations: (1) Localizability, lacking mechanisms for autonomous region localization; (2) Scalability, limited fine-grained modeling at high resolution. To address these challenges, we introduce Granular Computing-driven SAM (Grc-SAM), a coarse-to-fine framework motivated by Granular Computing (GrC). First, the coarse stage adaptively extracts high-response regions from features to achieve precise foreground localization and reduce reliance on external prompts. Second, the fine stage applies finer patch partitioning with sparse local swin-style attention to enhance detail modeling and enable high-resolution segmentation. Third, refined masks are encoded as latent prompt embeddings for the SAM decoder, replacing handcrafted prompts with an automated reasoning process. By integrating multi-granularity attention, Grc-SAM bridges granular computing with vision transformers. Extensive experimental results demonstrate Grc-SAM outperforms baseline methods in both accuracy and scalability. It offers a unique granular computational perspective for prompt-free segmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19049v1" target="_blank" rel="noopener noreferrer">
                超越奖励边际：通过视频生成重新思考并解决扩散模型中的似然偏移问题
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Reward Margin: Rethinking and Resolving Likelihood Displacement in Diffusion Models via Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruojun Xu, Yu Kai, Xuhua Ren, Jiaxiang Cheng, Bing Ma, Tianxiang Zheng, Qinhlin ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于扩散模型在视频生成中的技术改进，属于纯粹的生成式AI领域。虽然扩散模型是重要的生成技术，但论文内容主要解决视频生成中的似然偏移问题，与推荐系统、搜索或广告的排名和匹配任务没有直接关联，也没有展示在异构数据处理方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:37:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19049v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19049v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Direct Preference Optimization (DPO) has shown promising results in aligning generative outputs with human preferences by distinguishing between chosen and rejected samples. However, a critical limitation of DPO is likelihood displacement, where the probabilities of chosen samples paradoxically decrease during training, undermining the quality of generation. Although this issue has been investigated in autoregressive models, its impact within diffusion-based models remains largely unexplored. This gap leads to suboptimal performance in tasks involving video generation. To address this, we conduct a formal analysis of DPO loss through updating policy within the diffusion framework, which describes how the updating of specific training samples influences the model's predictions on other samples. Using this tool, we identify two main failure modes: (1) Optimization Conflict, which arises from small reward margins between chosen and rejected samples, and (2) Suboptimal Maximization, caused by large reward margins. Informed by these insights, we introduce a novel solution named Policy-Guided DPO (PG-DPO), combining Adaptive Rejection Scaling (ARS) and Implicit Preference Regularization (IPR) to effectively mitigate likelihood displacement. Experiments show that PG-DPO outperforms existing methods in both quantitative metrics and qualitative evaluations, offering a robust solution for improving preference alignment in video generation tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19032v1" target="_blank" rel="noopener noreferrer">
                基准测试LVLM的腐败鲁棒性：一个判别性基准与鲁棒性对齐度量
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Benchmarking Corruption Robustness of LVLMs: A Discriminative Benchmark and Robustness Alignment Metric
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiangjie Sui, Songyang Li, Hanwei Zhu, Baoliang Chen, Yuming Fang, Xin Sun
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于大型视觉语言模型（LVLM）的鲁棒性基准测试和评估，属于纯粹的评估基准研究。虽然LVLM与视觉语言模型相关，但论文的核心是腐败鲁棒性测试和评估指标开发，这属于评估基准范畴，与您关注的推荐系统、搜索、广告中的核心进展、使能技术或直接应用无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:07:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19032v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19032v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite the remarkable reasoning abilities of large vision-language models (LVLMs), their robustness under visual corruptions remains insufficiently studied. Existing evaluation paradigms exhibit two major limitations: 1) the dominance of low-discriminative samples in current datasets masks the real robustness gap between models; and 2) conventional accuracy-based metric fail to capture the degradation of the underlying prediction structure. To bridge these gaps, we introduce Bench-C, a comprehensive benchmark emphasizing discriminative samples for assessing corruption robustness, where a selection strategy is proposed to jointly consider the prediction inconsistency under corruption and the semantic diversity. Furthermore, we propose the Robustness Alignment Score (RAS), a unified metric that measures degradation in logit-level prediction structure by considering the shifts in prediction uncertainty and calibration alignment. Comprehensive experiments and analysis reveal several interesting findings: 1) model behaviors exhibit distinguish patterns under corruptions, such as erroneous confidence and hesitation; 2) despite subtle corruption may lead to a slight accuracy gain, the overall prediction structure still degrades; 3) by decomposing corruption robustness into destructive and corrective components, the distinct failure and recovery patterns across models can be revealed.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19024v1" target="_blank" rel="noopener noreferrer">
                Life-IQA：通过图卷积网络增强的层间交互与基于专家混合的特征解耦提升盲图像质量评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Long Tang, Guoquan Zhen, Jie Hao, Jianbo Zhang, Huiyu Duan, Liang Yuan, Guangtao...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像质量评估，虽然采用了MoE（专家混合）架构，但其核心应用场景是图像处理而非推荐系统、搜索或广告。论文的技术方法（GCN层交互、特征解耦）与RecSys/Search/Ads的核心排名和用户建模问题缺乏直接关联，潜在应用场景不明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:59:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19024v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19024v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Blind image quality assessment (BIQA) plays a crucial role in evaluating and optimizing visual experience. Most existing BIQA approaches fuse shallow and deep features extracted from backbone networks, while overlooking the unequal contributions to quality prediction. Moreover, while various vision encoder backbones are widely adopted in BIQA, the effective quality decoding architectures remain underexplored. To address these limitations, this paper investigates the contributions of shallow and deep features to BIQA, and proposes a effective quality feature decoding framework via GCN-enhanced \underline{l}ayer\underline{i}nteraction and MoE-based \underline{f}eature d\underline{e}coupling, termed \textbf{(Life-IQA)}. Specifically, the GCN-enhanced layer interaction module utilizes the GCN-enhanced deepest-layer features as query and the penultimate-layer features as key, value, then performs cross-attention to achieve feature interaction. Moreover, a MoE-based feature decoupling module is proposed to decouple fused representations though different experts specialized for specific distortion types or quality dimensions. Extensive experiments demonstrate that Life-IQA shows more favorable balance between accuracy and cost than a vanilla Transformer decoder and achieves state-of-the-art performance on multiple BIQA benchmarks.The code is available at: \href{https://github.com/TANGLONG2/Life-IQA/tree/main}{\texttt{Life-IQA}}.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19021v1" target="_blank" rel="noopener noreferrer">
                动态粒度至关重要：超越固定补丁分割的视觉Transformer再思考
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dynamic Granularity Matters: Rethinking Vision Transformers Beyond Fixed Patch Splitting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qiyang Yu, Yu Fang, Tianrui Li, Xuemei Cao, Yan Chen, Jianghao Li, Fan Min
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉Transformer中补丁分割的改进，属于纯粹的计算机视觉架构优化。虽然Transformer架构本身与推荐/搜索系统相关，但该工作专注于视觉特定的补丁处理，没有明确展示在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:55:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19021v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19021v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision Transformers (ViTs) have demonstrated strong capabilities in capturing global dependencies but often struggle to efficiently represent fine-grained local details. Existing multi-scale approaches alleviate this issue by integrating hierarchical or hybrid features; however, they rely on fixed patch sizes and introduce redundant computation. To address these limitations, we propose Granularity-driven Vision Transformer (Grc-ViT), a dynamic coarse-to-fine framework that adaptively adjusts visual granularity based on image complexity. It comprises two key stages: (1) Coarse Granularity Evaluation module, which assesses visual complexity using edge density, entropy, and frequency-domain cues to estimate suitable patch and window sizes; (2) Fine-grained Refinement module, which refines attention computation according to the selected granularity, enabling efficient and precise feature learning. Two learnable parameters, α and \b{eta}, are optimized end-to-end to balance global reasoning and local perception. Comprehensive evaluations demonstrate that Grc-ViT enhances fine-grained discrimination while achieving a superior trade-off between accuracy and computational efficiency.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19004v1" target="_blank" rel="noopener noreferrer">
                一种用于真实文本到LiDAR场景生成的自条件表示引导扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wentao Qu, Guofeng Mei, Yang Wu, Yongshun Gong, Xiaoshui Huang, Liang Xiao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到LiDAR场景生成，属于计算机视觉和3D感知领域，与推荐系统、搜索或广告的核心技术关联度极低。虽然扩散模型是生成式AI的重要技术，但LiDAR场景生成在RecSys/Search/Ads领域缺乏明确的应用场景，主要适用于自动驾驶、机器人等物理世界感知任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:32:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19004v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19004v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-to-LiDAR generation can customize 3D data with rich structures and diverse scenes for downstream tasks. However, the scarcity of Text-LiDAR pairs often causes insufficient training priors, generating overly smooth 3D scenes. Moreover, low-quality text descriptions may degrade generation quality and controllability. In this paper, we propose a Text-to-LiDAR Diffusion Model for scene generation, named T2LDM, with a Self-Conditioned Representation Guidance (SCRG). Specifically, SCRG, by aligning to the real representations, provides the soft supervision with reconstruction details for the Denoising Network (DN) in training, while decoupled in inference. In this way, T2LDM can perceive rich geometric structures from data distribution, generating detailed objects in scenes. Meanwhile, we construct a content-composable Text-LiDAR benchmark, T2nuScenes, along with a controllability metric. Based on this, we analyze the effects of different text prompts for LiDAR generation quality and controllability, providing practical prompt paradigms and insights. Furthermore, a directional position prior is designed to mitigate street distortion, further improving scene fidelity. Additionally, by learning a conditional encoder via frozen DN, T2LDM can support multiple conditional tasks, including Sparse-to-Dense, Dense-to-Sparse, and Semantic-to-LiDAR generation. Extensive experiments in unconditional and conditional generation demonstrate that T2LDM outperforms existing methods, achieving state-of-the-art scene generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18991v1" target="_blank" rel="noopener noreferrer">
                用于三维一致性视频生成的视图一致性扩散表示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            View-Consistent Diffusion Representations for 3D-Consistent Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Duolikun Danier, Ge Gao, Steven McDonagh, Changjian Li, Hakan Bilen, Oisin Mac A...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注3D一致的视频生成，属于计算机视觉和生成模型的范畴。虽然扩散模型是LLM相关技术，但该工作专注于3D视觉和视频生成，与推荐系统、搜索或广告的核心技术没有直接关联。在推荐/搜索/广告领域的潜在应用非常有限，可能仅涉及广告创意生成，但根据指导原则这属于不相关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:16:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18991v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18991v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video generation models have made significant progress in generating realistic content, enabling applications in simulation, gaming, and film making. However, current generated videos still contain visual artifacts arising from 3D inconsistencies, e.g., objects and structures deforming under changes in camera pose, which can undermine user experience and simulation fidelity. Motivated by recent findings on representation alignment for diffusion models, we hypothesize that improving the multi-view consistency of video diffusion representations will yield more 3D-consistent video generation. Through detailed analysis on multiple recent camera-controlled video diffusion models we reveal strong correlations between 3D-consistent representations and videos. We also propose ViCoDR, a new approach for improving the 3D consistency of video models by learning multi-view consistent diffusion representations. We evaluate ViCoDR on camera controlled image-to-video, text-to-video, and multi-view generation models, demonstrating significant improvements in the 3D consistency of the generated videos. Project page: https://danier97.github.io/ViCoDR.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18983v1" target="_blank" rel="noopener noreferrer">
                UMCL：基于单模态生成的多模态对比学习用于跨压缩率深度伪造检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UMCL: Unimodal-generated Multimodal Contrastive Learning for Cross-compression-rate Deepfake Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ching-Yi Lai, Chih-Yu Jian, Pei-Cheng Chuang, Chia-Ming Lee, Chih-Chung Hsu, Chi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注深度伪造检测和多媒体安全领域，这与推荐系统、搜索或广告的核心技术焦点无关。虽然提到了多模态对比学习技术，但其应用场景（跨压缩率深度伪造检测）在推荐、搜索或广告领域没有直接的相关性或潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:56:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18983v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18983v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In deepfake detection, the varying degrees of compression employed by social media platforms pose significant challenges for model generalization and reliability. Although existing methods have progressed from single-modal to multimodal approaches, they face critical limitations: single-modal methods struggle with feature degradation under data compression in social media streaming, while multimodal approaches require expensive data collection and labeling and suffer from inconsistent modal quality or accessibility in real-world scenarios. To address these challenges, we propose a novel Unimodal-generated Multimodal Contrastive Learning (UMCL) framework for robust cross-compression-rate (CCR) deepfake detection. In the training stage, our approach transforms a single visual modality into three complementary features: compression-robust rPPG signals, temporal landmark dynamics, and semantic embeddings from pre-trained vision-language models. These features are explicitly aligned through an affinity-driven semantic alignment (ASA) strategy, which models inter-modal relationships through affinity matrices and optimizes their consistency through contrastive learning. Subsequently, our cross-quality similarity learning (CQSL) strategy enhances feature robustness across compression rates. Extensive experiments demonstrate that our method achieves superior performance across various compression rates and manipulation types, establishing a new benchmark for robust deepfake detection. Notably, our approach maintains high detection accuracy even when individual features degrade, while providing interpretable insights into feature relationships through explicit alignment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18957v1" target="_blank" rel="noopener noreferrer">
                Eevee：面向近距离高分辨率视频虚拟试穿
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Eevee: Towards Close-up High-resolution Video-based Virtual Try-on
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jianhao Zeng, Yancheng Bai, Ruidong Chen, Xuanpu Zhang, Lei Sun, Dongyang Jin, R...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频虚拟试穿技术，属于计算机视觉和图形学领域，与推荐系统、搜索或广告的核心技术栈没有直接关联。虽然虚拟试穿在电商场景中有潜在应用，但论文本身主要解决视觉生成和视频处理问题，不属于核心推荐/搜索算法或LLM技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:19:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18957v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18957v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video virtual try-on technology provides a cost-effective solution for creating marketing videos in fashion e-commerce. However, its practical adoption is hindered by two critical limitations. First, the reliance on a single garment image as input in current virtual try-on datasets limits the accurate capture of realistic texture details. Second, most existing methods focus solely on generating full-shot virtual try-on videos, neglecting the business's demand for videos that also provide detailed close-ups. To address these challenges, we introduce a high-resolution dataset for video-based virtual try-on. This dataset offers two key features. First, it provides more detailed information on the garments, which includes high-fidelity images with detailed close-ups and textual descriptions; Second, it uniquely includes full-shot and close-up try-on videos of real human models. Furthermore, accurately assessing consistency becomes significantly more critical for the close-up videos, which demand high-fidelity preservation of garment details. To facilitate such fine-grained evaluation, we propose a new garment consistency metric VGID (Video Garment Inception Distance) that quantifies the preservation of both texture and structure. Our experiments validate these contributions. We demonstrate that by utilizing the detailed images from our dataset, existing video generation models can extract and incorporate texture features, significantly enhancing the realism and detail fidelity of virtual try-on results. Furthermore, we conduct a comprehensive benchmark of recent models. The benchmark effectively identifies the texture and structural preservation problems among current methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18950v1" target="_blank" rel="noopener noreferrer">
                Compressor-VLA：面向高效机器人操作的指令引导视觉令牌压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Compressor-VLA: Instruction-Guided Visual Token Compression for Efficient Robotic Manipulation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Juntao Gao, Feiyang Ye, Jing Zhang, Wenjing Qian
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注机器人操作领域的视觉令牌压缩技术，属于计算机视觉和机器人学的交叉领域。虽然提到了指令引导和令牌压缩等概念，但其应用场景明确限定在机器人操作，与推荐系统、搜索或广告领域缺乏直接关联，且未展示这些技术在其他领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:06:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18950v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18950v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language-Action (VLA) models have emerged as a powerful paradigm in Embodied AI. However, the significant computational overhead of processing redundant visual tokens remains a critical bottleneck for real-time robotic deployment. While standard token pruning techniques can alleviate this, these task-agnostic methods struggle to preserve task-critical visual information. To address this challenge, simultaneously preserving both the holistic context and fine-grained details for precise action, we propose Compressor-VLA, a novel hybrid instruction-conditioned token compression framework designed for efficient, task-oriented compression of visual information in VLA models. The proposed Compressor-VLA framework consists of two token compression modules: a Semantic Task Compressor (STC) that distills holistic, task-relevant context, and a Spatial Refinement Compressor (SRC) that preserves fine-grained spatial details. This compression is dynamically modulated by the natural language instruction, allowing for the adaptive condensation of task-relevant visual information. Experimentally, extensive evaluations demonstrate that Compressor-VLA achieves a competitive success rate on the LIBERO benchmark while reducing FLOPs by 59% and the visual token count by over 3x compared to its baseline. The real-robot deployments on a dual-arm robot platform validate the model's sim-to-real transferability and practical applicability. Moreover, qualitative analyses reveal that our instruction guidance dynamically steers the model's perceptual focus toward task-relevant objects, thereby validating the effectiveness of our approach.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18942v1" target="_blank" rel="noopener noreferrer">
                VeCoR - 用于流匹配的速度对比正则化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VeCoR - Velocity Contrastive Regularization for Flow Matching
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zong-Wei Hong, Jing-lun Li, Lin-Ze Li, Shen Zhang, Yao Tang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种用于流匹配的正则化方法，属于生成模型的技术改进。虽然流匹配在图像生成中有应用，但该技术本身与推荐系统、搜索或广告的核心排名任务关联较弱。除非流匹配被专门应用于用户行为序列建模或异构数据生成，否则对当前关注领域的直接相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:48:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18942v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18942v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Flow Matching (FM) has recently emerged as a principled and efficient alternative to diffusion models. Standard FM encourages the learned velocity field to follow a target direction; however, it may accumulate errors along the trajectory and drive samples off the data manifold, leading to perceptual degradation, especially in lightweight or low-step configurations. To enhance stability and generalization, we extend FM into a balanced attract-repel scheme that provides explicit guidance on both "where to go" and "where not to go." To be formal, we propose \textbf{Velocity Contrastive Regularization (VeCoR)}, a complementary training scheme for flow-based generative modeling that augments the standard FM objective with contrastive, two-sided supervision. VeCoR not only aligns the predicted velocity with a stable reference direction (positive supervision) but also pushes it away from inconsistent, off-manifold directions (negative supervision). This contrastive formulation transforms FM from a purely attractive, one-sided objective into a two-sided training signal, regularizing trajectory evolution and improving perceptual fidelity across datasets and backbones. On ImageNet-1K 256$\times$256, VeCoR yields 22\% and 35\% relative FID reductions on SiT-XL/2 and REPA-SiT-XL/2 backbones, respectively, and achieves further FID gains (32\% relative) on MS-COCO text-to-image generation, demonstrating consistent improvements in stability, convergence, and image quality, particularly in low-step and lightweight settings. Project page: https://p458732.github.io/VeCoR_Project_Page/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18929v1" target="_blank" rel="noopener noreferrer">
                以人为中心的开放未来任务发现：问题表述、基准构建与可扩展的树状搜索方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Human-Centric Open-Future Task Discovery: Formulation, Benchmark, and Scalable Tree-Based Search
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zijian Song, Xiaoxin Lin, Tao Pu, Zhenlong Yuan, Guangrun Wang, Liang Lin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注任务发现和搜索算法，虽然涉及搜索技术，但其核心是开放世界任务发现这一通用AI问题，而非具体的推荐系统、搜索或广告应用。论文的'开放未来任务发现'定位更偏向通用人工智能和任务规划领域，与当前关注的LLM技术、Transformer架构或直接应用在推荐/搜索/广告系统的技术关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:33:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18929v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18929v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent progress in robotics and embodied AI is largely driven by Large Multimodal Models (LMMs). However, a key challenge remains underexplored: how can we advance LMMs to discover tasks that directly assist humans in open-future scenarios, where human intentions are highly concurrent and dynamic. In this work, we formalize the problem of Human-centric Open-future Task Discovery (HOTD), focusing particularly on identifying tasks that reduce human effort across multiple plausible futures. To facilitate this study, we propose an HOTD-Bench, which features over 2K real-world videos, a semi-automated annotation pipeline, and a simulation-based protocol tailored for open-set future evaluation. Additionally, we propose the Collaborative Multi-Agent Search Tree (CMAST) framework, which decomposes the complex reasoning through a multi-agent system and structures the reasoning process through a scalable search tree module. In our experiments, CMAST achieves the best performance on the HOTD-Bench, significantly surpassing existing LMMs. It also integrates well with existing LMMs, consistently improving performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18927v1" target="_blank" rel="noopener noreferrer">
                FineXtrol：通过细粒度文本控制的可控运动生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FineXtrol: Controllable Motion Generation via Fine-Grained Text
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Keming Shen, Bizhu Wu, Junliang Chen, Xiaoqin Wang, Linlin Shen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于可控运动生成，属于计算机视觉和图形学领域，与推荐系统、搜索或广告的核心技术栈没有直接关联。虽然涉及文本控制，但运动生成的应用场景（如动画、游戏）与RecSys/Search/Ads的排序、检索或用户建模需求相去甚远，缺乏明确的转化路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:32:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18927v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18927v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent works have sought to enhance the controllability and precision of text-driven motion generation. Some approaches leverage large language models (LLMs) to produce more detailed texts, while others incorporate global 3D coordinate sequences as additional control signals. However, the former often introduces misaligned details and lacks explicit temporal cues, and the latter incurs significant computational cost when converting coordinates to standard motion representations. To address these issues, we propose FineXtrol, a novel control framework for efficient motion generation guided by temporally-aware, precise, user-friendly, and fine-grained textual control signals that describe specific body part movements over time. In support of this framework, we design a hierarchical contrastive learning module that encourages the text encoder to produce more discriminative embeddings for our novel control signals, thereby improving motion controllability. Quantitative results show that FineXtrol achieves strong performance in controllable motion generation, while qualitative analysis demonstrates its flexibility in directing specific body part movements.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18920v1" target="_blank" rel="noopener noreferrer">
                EventSTU：面向视频大语言模型的事件引导高效时空理解
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            EventSTU: Event-Guided Efficient Spatio-Temporal Understanding for Video Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenhao Xu, Xin Dong, Yue Li, Haoyuan Shi, Zhiwei Xiong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频大语言模型的时空理解效率问题，属于纯粹的视觉和多模态领域。虽然提到了效率优化，但其核心应用场景是视频理解而非推荐系统、搜索或广告领域。事件引导机制在推荐/搜索/广告中的潜在应用不明确，缺乏直接相关性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:30:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18920v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18920v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video large language models have demonstrated strong video understanding capabilities but suffer from high inference costs due to the massive number of tokens in long videos. Inspired by event-based vision, we propose an event-guided, training-free framework for efficient spatio-temporal understanding, named EventSTU. In the temporal domain, we design a coarse-to-fine keyframe sampling algorithm that exploits the change-triggered property of event cameras to eliminate redundant frames. In the spatial domain, we design an adaptive token pruning algorithm that leverages the visual saliency of events as a zero-cost prior to guide spatial reduction. From a holistic spatio-temporal perspective, we further integrate question relevance from keyframe sampling to adaptively allocate token pruning budgets. To facilitate evaluation, we construct EventBench, the first event-inclusive, human-annotated multimodal benchmark that covers diverse real-world scenarios. Beyond physical event cameras, EventSTU also supports general video understanding using simulated events. Comprehensive experiments show that EventSTU achieves 3.01x FLOPs reduction and 3.10x prefilling speedup over the strongest baseline while still improving performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18919v1" target="_blank" rel="noopener noreferrer">
                学习信任什么：贝叶斯先验引导的视觉生成优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruiying Liu, Yuanzhi Liang, Haibin Huang, Tianshu Yu, Chi Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉生成领域的优化方法，虽然涉及贝叶斯先验和优化技术，但其核心应用场景是视觉生成而非推荐系统、搜索或广告。论文没有明确展示这些技术如何应用于异构数据处理、用户序列建模或排名任务，因此与当前关注点的相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:29:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18919v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18919v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Group Relative Policy Optimization (GRPO) has emerged as an effective and lightweight framework for post-training visual generative models. However, its performance is fundamentally limited by the ambiguity of textual visual correspondence: a single prompt may validly describe diverse visual outputs, and a single image or video may support multiple equally correct interpretations. This many to many relationship leads reward models to generate uncertain and weakly discriminative signals, causing GRPO to underutilize reliable feedback and overfit noisy ones. We introduce Bayesian Prior-Guided Optimization (BPGO), a novel extension of GRPO that explicitly models reward uncertainty through a semantic prior anchor. BPGO adaptively modulates optimization trust at two levels: inter-group Bayesian trust allocation emphasizes updates from groups consistent with the prior while down-weighting ambiguous ones, and intra-group prior-anchored renormalization sharpens sample distinctions by expanding confident deviations and compressing uncertain scores. Across both image and video generation tasks, BPGO delivers consistently stronger semantic alignment, enhanced perceptual fidelity, and faster convergence than standard GRPO and recent variants.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19317v1" target="_blank" rel="noopener noreferrer">
                MultiBanAbs：一个全面的多领域孟加拉语抽象文本摘要数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MultiBanAbs: A Comprehensive Multi-Domain Bangla Abstractive Text Summarization Dataset
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md. Tanzim Ferdous, Naeem Ahsan Chowdhury, Prithwiraj Bhattacharjee
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于孟加拉语文本摘要数据集构建，属于纯NLP领域的内容生成任务。虽然涉及多领域数据，但缺乏与推荐系统、搜索或广告的直接关联，且不涉及LLM技术、Transformer架构改进或异构数据处理等核心关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:11:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19317v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19317v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study developed a new Bangla abstractive summarization dataset to generate concise summaries of Bangla articles from diverse sources. Most existing studies in this field have concentrated on news articles, where journalists usually follow a fixed writing style. While such approaches are effective in limited contexts, they often fail to adapt to the varied nature of real-world Bangla texts. In today's digital era, a massive amount of Bangla content is continuously produced across blogs, newspapers, and social media. This creates a pressing need for summarization systems that can reduce information overload and help readers understand content more quickly. To address this challenge, we developed a dataset of over 54,000 Bangla articles and summaries collected from multiple sources, including blogs such as Cinegolpo and newspapers such as Samakal and The Business Standard. Unlike single-domain resources, our dataset spans multiple domains and writing styles. It offers greater adaptability and practical relevance. To establish strong baselines, we trained and evaluated this dataset using several deep learning and transfer learning models, including LSTM, BanglaT5-small, and MTS-small. The results highlight its potential as a benchmark for future research in Bangla natural language processing. This dataset provides a solid foundation for building robust summarization systems and helps expand NLP resources for low-resource languages.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19304v1" target="_blank" rel="noopener noreferrer">
                AutoEnv：用于测量跨环境智能体学习的自动化环境
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AutoEnv: Automated Environments for Measuring Cross-Environment Agent Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiayi Zhang, Yiran Peng, Fanqi Kong, Yang Cheng, Yifan Wu, Zhaoyang Yu, Jinyu Xi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注自动化环境和跨环境智能体学习，这属于强化学习的通用基础设施研究。虽然强化学习在推荐系统中可能有应用，但该标题没有明确指向推荐、搜索或广告领域，也没有涉及LLM、Transformer架构或异构数据建模等核心技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:54:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19304v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19304v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Humans naturally adapt to diverse environments by learning underlying rules across worlds with different dynamics, observations, and reward structures. In contrast, existing agents typically demonstrate improvements via self-evolving within a single domain, implicitly assuming a fixed environment distribution. Cross-environment learning has remained largely unmeasured: there is no standard collection of controllable, heterogeneous environments, nor a unified way to represent how agents learn. We address these gaps in two steps. First, we propose AutoEnv, an automated framework that treats environments as factorizable distributions over transitions, observations, and rewards, enabling low-cost (4.12 USD on average) generation of heterogeneous worlds. Using AutoEnv, we construct AutoEnv-36, a dataset of 36 environments with 358 validated levels, on which seven language models achieve 12-49% normalized reward, demonstrating the challenge of AutoEnv-36. Second, we formalize agent learning as a component-centric process driven by three stages of Selection, Optimization, and Evaluation applied to an improvable agent component. Using this formulation, we design eight learning methods and evaluate them on AutoEnv-36. Empirically, the gain of any single learning method quickly decrease as the number of environments increases, revealing that fixed learning methods do not scale across heterogeneous environments. Environment-adaptive selection of learning methods substantially improves performance but exhibits diminishing returns as the method space expands. These results highlight both the necessity and the current limitations of agent learning for scalable cross-environment generalization, and position AutoEnv and AutoEnv-36 as a testbed for studying cross-environment agent learning. The code is avaiable at https://github.com/FoundationAgents/AutoEnv.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19260v1" target="_blank" rel="noopener noreferrer">
                一种营养多模态光电容积脉搏波语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Nutrition Multimodal Photoplethysmography Language Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kyle Verrier, Achille Nazaret, Joseph Futoma, Andrew C. Miller, Guillermo Sapiro
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确指向医疗健康领域（营养监测和光电容积脉搏波），属于明确的医疗应用范畴，与RecSys/Search/Ads完全无关。根据用户指定的不相关主题，医疗、生物等特定领域应用应被排除。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:12:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19260v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19260v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Hunger and satiety dynamics shape dietary behaviors and metabolic health, yet remain difficult to capture in everyday settings. We present a Nutrition Photoplethysmography Language Model (NPLM), integrating continuous photoplethysmography (PPG) from wearables with meal descriptions. NPLM projects PPG into embeddings interpretable by language models, enabling joint reasoning over physiology and meal context. Trained on 19,340 participants and 1.1 million meal-PPG pairs, the model improved daily caloric intake prediction by 11% over text-only baselines, with accuracy maintained when 80% of meal text was removed. In an independent validation study (n=140) with controlled dining and detailed meal information, the model replicated these findings. These results demonstrate the value of integrating physiological measurements from consumer wearables with meal information for noninvasive dietary monitoring at scale.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19120v1" target="_blank" rel="noopener noreferrer">
                论离散对象命名的最优性：亲属关系案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            On the Optimality of Discrete Object Naming: a Kinship Case Study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Phong Le, Mees Lindeman, Raquel G. Alhama
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明研究离散对象命名问题，以亲属关系作为案例研究，这属于语言学或认知科学领域，与推荐系统、搜索、广告或LLM技术无直接关联。标题中未提及任何与Transformer架构、多模态建模或推荐/搜索应用相关的内容，因此完全不相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:49:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19120v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19120v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The structure of naming systems in natural languages hinges on a trade-off between high informativeness and low complexity. Prior work capitalizes on information theory to formalize these notions; however, these studies generally rely on two simplifications: (i) optimal listeners, and (ii) universal communicative need across languages. Here, we address these limitations by introducing an information-theoretic framework for discrete object naming systems, and we use it to prove that an optimal trade-off is achievable if and only if the listener's decoder is equivalent to the Bayesian decoder of the speaker. Adopting a referential game setup from emergent communication, and focusing on the semantic domain of kinship, we show that our notion of optimality is not only theoretically achievable but also emerges empirically in learned communication systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19118v1" target="_blank" rel="noopener noreferrer">
                一种用于纳瓦特尔语单词拼写统一的符号Perl算法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A symbolic Perl algorithm for the unification of Nahuatl word spellings
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Juan-José Guzmán-Landa, Jesús Vázquez-Osorio, Juan-Manuel Torres-Moreno, Ligia Q...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于纳瓦特尔语（一种美洲土著语言）的拼写统一算法，属于特定语言处理领域。这与推荐系统、搜索或广告的核心进展、LLM技术、Transformer架构或异构数据建模完全无关。该研究是高度专业化的语言学应用，没有任何明显的实际应用可以转移到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:49:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19118v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19118v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we describe a symbolic model for the automatic orthographic unification of Nawatl text documents. Our model is based on algorithms that we have previously used to analyze sentences in Nawatl, and on the corpus called $π$-yalli, consisting of texts in several Nawatl orthographies. Our automatic unification algorithm implements linguistic rules in symbolic regular expressions. We also present a manual evaluation protocol that we have proposed and implemented to assess the quality of the unified sentences generated by our algorithm, by testing in a sentence semantic task. We have obtained encouraging results from the evaluators for most of the desired features of our artificially unified sentences
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19063v1" target="_blank" rel="noopener noreferrer">
                蒙太奇逻辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Logic of Montage
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hayami Takahashi, Kensuke Takahashi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这个标题与电影剪辑和视觉叙事相关，属于艺术创作领域。它不涉及推荐系统、搜索、广告的核心进展，也不涉及LLM技术、Transformer架构或异构数据建模。该主题完全超出了当前关注的技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:55:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19063v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19063v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In expressing emotions, as an expression form separate from natural language, we propose an alternative form that complements natural language, acting as a proxy or window for emotional states. First, we set up an expression form "Effect of Contradictory Structure." "Effect of Contradictory Structure" is not static but dynamic. Effect in "Effect of Contradictory Structure" is unpleasant or pleasant, and the orientation to avoid that unpleasantness is considered pseudo-expression of will. Second, "Effect of Contradictory Structure" can be overlapped with each other. This overlapping operation is called "montage." A broader "Structure" that includes related "Effect of Contradictory Structure" and "Effect of Structure" are set up. Montage produces "Effect of Structure". In montage, it is necessary to set something like "strength," so we adopted Deleuze and Deleuze/Guattari's word "intensity" and set it as an element of our model. We set up a general theoretical framework - Word Import Between Systems (Models) and justified the import of "intensity" through Austin's use of the word "force." "Effect of Structure" process is demonstrated using the example of proceeding to the next level of education.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18937v1" target="_blank" rel="noopener noreferrer">
                基于知识的图形化方法在临床试验中的安全信号检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Knowledge-based Graphical Method for Safety Signal Detection in Clinical Trials
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Francois Vandenhende, Anna Georgiou, Michalis Georgiou, Theodoros Psaras, Ellie ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于临床试验中的安全信号检测，属于医学和生物医学领域，与推荐系统、搜索或广告完全无关。论文标题明确指向医疗应用场景，没有任何技术内容与我的关注领域相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:42:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18937v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18937v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present a graphical, knowledge-based method for reviewing treatment-emergent adverse events (AEs) in clinical trials. The approach enhances MedDRA by adding a hidden medical knowledge layer (Safeterm) that captures semantic relationships between terms in a 2-D map. Using this layer, AE Preferred Terms can be regrouped automatically into similarity clusters, and their association to the trial disease may be quantified. The Safeterm map is available online and connected to aggregated AE incidence tables from ClinicalTrials.gov. For signal detection, we compute treatment-specific disproportionality metrics using shrinkage incidence ratios. Cluster-level EBGM values are then derived through precision-weighted aggregation. Two visual outputs support interpretation: a semantic map showing AE incidence and an expectedness-versus-disproportionality plot for rapid signal detection. Applied to three legacy trials, the automated method clearly recovers all expected safety signals. Overall, augmenting MedDRA with a medical knowledge layer improves clarity, efficiency, and accuracy in AE interpretation for clinical trials.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18860v1" target="_blank" rel="noopener noreferrer">
                使用大型语言模型生成阅读理解练习用于教育应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generating Reading Comprehension Exercises with Large Language Models for Educational Applications
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xingyu Huang, Fei Jiang, Jianli Xiao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于教育领域的阅读理解练习生成，属于纯粹的LLM内容生成应用。这与我的关注点（推荐系统、搜索、广告中的核心进展和LLM技术应用）完全无关，且明确属于被排除的'纯粹LLM中心化主题'和'AIGC、内容生成'类别。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:00:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18860v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18860v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid development of large language models (LLMs), the applications of LLMs have grown substantially. In the education domain, LLMs demonstrate significant potential, particularly in automatic text generation, which enables the creation of intelligent and adaptive learning content. This paper proposes a new LLMs framework, which is named as Reading Comprehension Exercise Generation (RCEG). It can generate high-quality and personalized English reading comprehension exercises automatically. Firstly, RCEG uses fine-tuned LLMs to generate content candidates. Then, it uses a discriminator to select the best candidate. Finally, the quality of the generated content has been improved greatly. To evaluate the performance of RCEG, a dedicated dataset for English reading comprehension is constructed to perform the experiments, and comprehensive evaluation metrics are used to analyze the experimental results. These metrics include content diversity, factual accuracy, linguistic toxicity, and pedagogical alignment. Experimental results show that RCEG significantly improves the relevance and cognitive appropriateness of the generated exercises.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18852v1" target="_blank" rel="noopener noreferrer">
                FanarGuard：面向阿拉伯语言模型的文化感知内容审核过滤器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FanarGuard: A Culturally-Aware Moderation Filter for Arabic Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Masoomali Fatehkia, Enes Altinisik, Husrev Taha Sencar
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于阿拉伯语内容审核和文化敏感性，属于特定语言的安全和伦理应用，与推荐系统、搜索或广告的核心技术进展无关。论文内容涉及模型安全性和文化适应性，属于被明确排除的伦理和非技术性话题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 07:48:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18852v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18852v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Content moderation filters are a critical safeguard against alignment failures in language models. Yet most existing filters focus narrowly on general safety and overlook cultural context. In this work, we introduce FanarGuard, a bilingual moderation filter that evaluates both safety and cultural alignment in Arabic and English. We construct a dataset of over 468K prompt and response pairs, drawn from synthetic and public datasets, scored by a panel of LLM judges on harmlessness and cultural awareness, and use it to train two filter variants. To rigorously evaluate cultural alignment, we further develop the first benchmark targeting Arabic cultural contexts, comprising over 1k norm-sensitive prompts with LLM-generated responses annotated by human raters. Results show that FanarGuard achieves stronger agreement with human annotations than inter-annotator reliability, while matching the performance of state-of-the-art filters on safety benchmarks. These findings highlight the importance of integrating cultural awareness into moderation and establish FanarGuard as a practical step toward more context-sensitive safeguards.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18824v1" target="_blank" rel="noopener noreferrer">
                使用多模态语言模型评估婴儿视觉与语言体验的对齐度
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Assessing the alignment between infants' visual and linguistic experience using multimodal language models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alvin Wei Ming Tan, Jane Yang, Tarun Sepuri, Khai Loong Aw, Robert Z. Sparks, Zi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注婴儿发展的多模态学习，属于发展心理学和认知科学领域，与推荐系统、搜索或广告的核心技术无关。虽然涉及多模态模型，但研究对象是婴儿体验对齐，没有任何潜在的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 06:58:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18824v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18824v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Figuring out which objects or concepts words refer to is a central language learning challenge for young children. Most models of this process posit that children learn early object labels from co-occurrences of words and their referents that occur when someone around them talks about an object in the immediate physical environment. But how aligned in time are children's visual and linguistic experiences during everyday learning? To date, answers to this question have been limited by the need for labor-intensive manual annotations of vision-language co-occurrences. Here, we evaluate the use of contrastive language-image pretraining (CLIP) models to automatically characterize vision-language alignment in egocentric videos taken from the infant perspective in home environments. After validating CLIP alignment scores using human alignment judgments, we apply this metric to a large corpus of infant-perspective videos. We show that idealized aligned moments for learning (e.g., "look at the ball" with a ball present in the child's view) are relatively rare in children's everyday experiences compared to modern machine learning datasets, and highlight variability in alignment both within and across children. These findings suggest that infrequent alignment is a constraint for models describing early word learning and offer a new method for investigating children's multimodal environment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18774v1" target="_blank" rel="noopener noreferrer">
                面向阿拉伯语语言变体下自动语音识别的上下文感知Whisper模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Context-Aware Whisper for Arabic ASR Under Linguistic Varieties
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bashar Talafha, Amin Abu Alhassan, Muhammad Abdul-Mageed
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于阿拉伯语自动语音识别(ASR)和语音处理，属于纯语音领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然提到了上下文感知，但这是针对语音识别任务的特定应用，没有展示出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 05:16:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18774v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18774v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Low-resource ASR remains a challenging problem, especially for languages like Arabic that exhibit wide dialectal variation and limited labeled data. We propose context-aware prompting strategies to adapt OpenAI's Whisper for Arabic speech recognition without retraining. Our methods include decoder prompting with first-pass transcriptions or retrieved utterances, and encoder prefixing using speech synthesized in the target speaker's voice. We introduce techniques such as prompt reordering, speaker-aware prefix synthesis, and modality-specific retrieval (lexical, semantic, acoustic) to improve transcription in real-world, zero-shot settings. Evaluated on nine Arabic linguistic conditions, our approach reduces WER by up to 22.3% on Modern Standard Arabic and 9.2% on dialectal speech, significantly mitigating hallucinations and speaker mismatch.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19437v1" target="_blank" rel="noopener noreferrer">
                LumiTex：面向具有光照上下文的高保真PBR纹理生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LumiTex: Towards High-Fidelity PBR Texture Generation with Illumination Context
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingzhi Bao, Hongze Chen, Lingting Zhu, Chenyu Liu, Runze Zhang, Keyang Luo, Zey...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机图形学中的PBR纹理生成，属于纯粹的视觉内容生成领域。虽然标题提到纹理生成，但这与推荐系统、搜索或广告中的核心排名、用户建模或特征工程没有直接关联，也不涉及LLM或Transformer架构在推荐领域的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19437v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19437v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Physically-based rendering (PBR) provides a principled standard for realistic material-lighting interactions in computer graphics. Despite recent advances in generating PBR textures, existing methods fail to address two fundamental challenges: 1) materials decomposition from image prompts under limited illumination cues, and 2) seamless and view-consistent texture completion. To this end, we propose LumiTex, an end-to-end framework that comprises three key components: (1) a multi-branch generation scheme that disentangles albedo and metallic-roughness under shared illumination priors for robust material understanding, (2) a lighting-aware material attention mechanism that injects illumination context into the decoding process for physically grounded generation of albedo, metallic, and roughness maps, and (3) a geometry-guided inpainting module based on a large view synthesis model that enriches texture coverage and ensures seamless, view-consistent UV completion. Extensive experiments demonstrate that LumiTex achieves state-of-the-art performance in texture quality, surpassing both existing open-source and commercial methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19431v1" target="_blank" rel="noopener noreferrer">
                四维云
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cloud4D
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jacob Lin, Edward Gryspeerdt, Ronald Clark
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">标题'Cloud4D'过于模糊且缺乏技术细节，无法判断其具体技术内容或应用领域。根据现有信息，该标题没有显示出与推荐系统、搜索、广告、LLM技术或Transformer架构的任何明确关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19431v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19431v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">physics.ao-ph</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    There has been great progress in improving numerical weather prediction and climate models using machine learning. However, most global models act at a kilometer-scale, making it challenging to model individual clouds and factors such as extreme precipitation, wind gusts, turbulence, and surface irradiance. Therefore, there is a need to move towards higher-resolution models, which in turn require high-resolution real-world observations that current instruments struggle to obtain. We present Cloud4D, the first learning-based framework that reconstructs a physically consistent, four-dimensional cloud state using only synchronized ground-based cameras. Leveraging a homography-guided 2D-to-3D transformer, Cloud4D infers the full 3D distribution of liquid water content at 25 m spatial and 5 s temporal resolution. By tracking the 3D liquid water content retrievals over time, Cloud4D additionally estimates horizontal wind vectors. Across a two-month deployment comprising six skyward cameras, our system delivers an order-of-magnitude improvement in space-time resolution relative to state-of-the-art satellite measurements, while retaining single-digit relative error ($<10\%$) against collocated radar measurements. Code and data are available on our project page https://cloud4d.jacob-lin.com/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19430v1" target="_blank" rel="noopener noreferrer">
                共同烹饪与清洁：教导具身智能体进行并行任务执行
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cook and Clean Together: Teaching Embodied Agents for Parallel Task Execution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dingkang Liang, Cheng Zhang, Xiaopeng Xu, Jianzhong Ju, Zhenbo Luo, Xiang Bai
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于具身智能体的并行任务执行，属于机器人学和具身AI领域。这与推荐系统、搜索或广告的核心技术栈没有直接关联，也不涉及LLM技术、Transformer架构进展或异构数据建模。论文内容更偏向于物理世界交互和机器人控制，而非信息检索或个性化推荐相关的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:59:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19430v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19430v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Task scheduling is critical for embodied AI, enabling agents to follow natural language instructions and execute actions efficiently in 3D physical worlds. However, existing datasets often simplify task planning by ignoring operations research (OR) knowledge and 3D spatial grounding. In this work, we propose Operations Research knowledge-based 3D Grounded Task Scheduling (ORS3D), a new task that requires the synergy of language understanding, 3D grounding, and efficiency optimization. Unlike prior settings, ORS3D demands that agents minimize total completion time by leveraging parallelizable subtasks, e.g., cleaning the sink while the microwave operates. To facilitate research on ORS3D, we construct ORS3D-60K, a large-scale dataset comprising 60K composite tasks across 4K real-world scenes. Furthermore, we propose GRANT, an embodied multi-modal large language model equipped with a simple yet effective scheduling token mechanism to generate efficient task schedules and grounded actions. Extensive experiments on ORS3D-60K validate the effectiveness of GRANT across language understanding, 3D grounding, and scheduling efficiency. The code is available at https://github.com/H-EmbodVis/GRANT
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19428v1" target="_blank" rel="noopener noreferrer">
                无需数据的流图蒸馏
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Flow Map Distillation Without Data
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shangyuan Tong, Nanye Ma, Saining Xie, Tommi Jaakkola
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明涉及流图蒸馏技术，这属于计算机视觉或图形处理领域，与推荐系统、搜索或广告的核心技术栈没有直接关联。标题中的'无需数据'可能指数据生成或合成方法，但缺乏与推荐/搜索/广告领域的明确连接点，无法识别出在Transformer架构、LLM技术或异构数据处理方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:58:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19428v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19428v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    State-of-the-art flow models achieve remarkable quality but require slow, iterative sampling. To accelerate this, flow maps can be distilled from pre-trained teachers, a procedure that conventionally requires sampling from an external dataset. We argue that this data-dependency introduces a fundamental risk of Teacher-Data Mismatch, as a static dataset may provide an incomplete or even misaligned representation of the teacher's full generative capabilities. This leads us to question whether this reliance on data is truly necessary for successful flow map distillation. In this work, we explore a data-free alternative that samples only from the prior distribution, a distribution the teacher is guaranteed to follow by construction, thereby circumventing the mismatch risk entirely. To demonstrate the practical viability of this philosophy, we introduce a principled framework that learns to predict the teacher's sampling path while actively correcting for its own compounding errors to ensure high fidelity. Our approach surpasses all data-based counterparts and establishes a new state-of-the-art by a significant margin. Specifically, distilling from SiT-XL/2+REPA, our method reaches an impressive FID of 1.45 on ImageNet 256x256, and 1.49 on ImageNet 512x512, both with only 1 sampling step. We hope our work establishes a more robust paradigm for accelerating generative models and motivates the broader adoption of flow map distillation without data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19426v1" target="_blank" rel="noopener noreferrer">
                Ref-SAM3D：通过文本连接SAM3D实现参考三维重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Ref-SAM3D: Bridging SAM3D with Text for Reference 3D Reconstruction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yun Zhou, Yaoting Wang, Guangquan Jie, Jinyu Liu, Henghui Ding
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D视觉重建技术，涉及3D分割和文本引导的3D生成。虽然提到了文本模态，但其核心应用领域是计算机视觉和3D重建，与推荐系统、搜索或广告的排名和建模需求没有直接关联。该技术缺乏在RecSys/Search/Ads领域的明确应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:58:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19426v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19426v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    SAM3D has garnered widespread attention for its strong 3D object reconstruction capabilities. However, a key limitation remains: SAM3D cannot reconstruct specific objects referred to by textual descriptions, a capability that is essential for practical applications such as 3D editing, game development, and virtual environments. To address this gap, we introduce Ref-SAM3D, a simple yet effective extension to SAM3D that incorporates textual descriptions as a high-level prior, enabling text-guided 3D reconstruction from a single RGB image. Through extensive qualitative experiments, we show that Ref-SAM3D, guided only by natural language and a single 2D view, delivers competitive and high-fidelity zero-shot reconstruction performance. Our results demonstrate that Ref-SAM3D effectively bridges the gap between 2D visual cues and 3D geometric understanding, offering a more flexible and accessible paradigm for reference-guided 3D reconstruction. Code is available at: https://github.com/FudanCVL/Ref-SAM3D.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19425v1" target="_blank" rel="noopener noreferrer">
                SAM3-Adapter：高效适配Segment Anything 3用于伪装目标分割、阴影检测和医学图像分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SAM3-Adapter: Efficient Adaptation of Segment Anything 3 for Camouflage Object Segmentation, Shadow Detection, and Medical Image Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianrun Chen, Runlong Cao, Xinda Yu, Lanyun Zhu, Chaotao Ding, Deyi Ji, Cheng Ch...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的特定分割任务（伪装目标分割、阴影检测、医学图像分割），属于纯粹的视觉应用。虽然涉及模型适配技术，但没有任何与推荐系统、搜索或广告相关的潜在应用场景，完全超出了当前关注的技术领域范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:57:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19425v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19425v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid rise of large-scale foundation models has reshaped the landscape of image segmentation, with models such as Segment Anything achieving unprecedented versatility across diverse vision tasks. However, previous generations-including SAM and its successor-still struggle with fine-grained, low-level segmentation challenges such as camouflaged object detection, medical image segmentation, cell image segmentation, and shadow detection. To address these limitations, we originally proposed SAM-Adapter in 2023, demonstrating substantial gains on these difficult scenarios. With the emergence of Segment Anything 3 (SAM3)-a more efficient and higher-performing evolution with a redesigned architecture and improved training pipeline-we revisit these long-standing challenges. In this work, we present SAM3-Adapter, the first adapter framework tailored for SAM3 that unlocks its full segmentation capability. SAM3-Adapter not only reduces computational overhead but also consistently surpasses both SAM and SAM2-based solutions, establishing new state-of-the-art results across multiple downstream tasks, including medical imaging, camouflaged (concealed) object segmentation, and shadow detection. Built upon the modular and composable design philosophy of the original SAM-Adapter, SAM3-Adapter provides stronger generalizability, richer task adaptability, and significantly improved segmentation precision. Extensive experiments confirm that integrating SAM3 with our adapter yields superior accuracy, robustness, and efficiency compared to all prior SAM-based adaptations. We hope SAM3-Adapter can serve as a foundation for future research and practical segmentation applications. Code, pre-trained models, and data processing pipelines are available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19396v1" target="_blank" rel="noopener noreferrer">
                动态声学环境中基于端侧深度学习的自适应波束成形实时目标跟踪
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Real-Time Object Tracking with On-Device Deep Learning for Adaptive Beamforming in Dynamic Acoustic Environments
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jorge Ortigoso-Narro, Jose A. Belloch, Adrian Amor-Martin, Sandra Roger, Maximo ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于声学环境中的目标跟踪和波束成形技术，属于信号处理和计算机视觉领域。虽然涉及深度学习，但其应用场景（声学环境、波束成形）与推荐系统、搜索或广告的核心技术栈没有直接关联，也没有明显的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:33:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19396v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19396v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SD</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Advances in object tracking and acoustic beamforming are driving new capabilities in surveillance, human-computer interaction, and robotics. This work presents an embedded system that integrates deep learning-based tracking with beamforming to achieve precise sound source localization and directional audio capture in dynamic environments. The approach combines single-camera depth estimation and stereo vision to enable accurate 3D localization of moving objects. A planar concentric circular microphone array constructed with MEMS microphones provides a compact, energy-efficient platform supporting 2D beam steering across azimuth and elevation. Real-time tracking outputs continuously adapt the array's focus, synchronizing the acoustic response with the target's position. By uniting learned spatial awareness with dynamic steering, the system maintains robust performance in the presence of multiple or moving sources. Experimental evaluation demonstrates significant gains in signal-to-interference ratio, making the design well-suited for teleconferencing, smart home devices, and assistive technologies.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19394v1" target="_blank" rel="noopener noreferrer">
                BackSplit：生物医学病灶分割中背景细分化的重要性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rachit Saluja, Asli Cihangir, Ruining Deng, Johannes C. Paetzold, Fengbei Liu, M...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于生物医学病灶分割这一特定医学领域，与推荐系统、搜索或广告的核心技术完全无关。论文讨论的背景细分化技术是纯粹的医学图像处理应用，没有任何潜在的应用场景可以迁移到推荐系统、搜索或广告领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:31:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19394v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19394v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Segmenting small lesions in medical images remains notoriously difficult. Most prior work tackles this challenge by either designing better architectures, loss functions, or data augmentation schemes; and collecting more labeled data. We take a different view, arguing that part of the problem lies in how the background is modeled. Common lesion segmentation collapses all non-lesion pixels into a single "background" class, ignoring the rich anatomical context in which lesions appear. In reality, the background is highly heterogeneous-composed of tissues, organs, and other structures that can now be labeled manually or inferred automatically using existing segmentation models. In this paper, we argue that training with fine-grained labels that sub-divide the background class, which we call BackSplit, is a simple yet powerful paradigm that can offer a significant performance boost without increasing inference costs. From an information theoretic standpoint, we prove that BackSplit increases the expected Fisher Information relative to conventional binary training, leading to tighter asymptotic bounds and more stable optimization. With extensive experiments across multiple datasets and architectures, we empirically show that BackSplit consistently boosts small-lesion segmentation performance, even when auxiliary labels are generated automatically using pretrained segmentation models. Additionally, we demonstrate that auxiliary labels derived from interactive segmentation frameworks exhibit the same beneficial effect, demonstrating its robustness, simplicity, and broad applicability.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19367v1" target="_blank" rel="noopener noreferrer">
                面向肺癌肿瘤分期分类的解剖结构感知混合深度学习框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            An Anatomy Aware Hybrid Deep Learning Framework for Lung Cancer Tumor Stage Classification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Saniah Kayenat Chowdhury, Rusab Sarmun, Muhammad E. H. Chowdhury, Sohaib Bassam ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的肺癌肿瘤分期分类，属于明确的医学应用范畴。根据用户设定的无关主题标准，医学、生物学等特定领域应用应被排除在外，且该工作与推荐系统、搜索或广告领域没有任何直接或间接的关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 18:01:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19367v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19367v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate lung cancer tumor staging is crucial for prognosis and treatment planning. However, it remains challenging for end-to-end deep learning approaches, as such approaches often overlook spatial and anatomical information that are central to the tumor-node-metastasis system. The tumor stage depends on multiple quantitative criteria, including the tumor size and its proximity to the nearest anatomical structures, and small variations can alter the staging outcome. We propose a medically grounded hybrid pipeline that performs staging by explicitly measuring the tumor's size and distance properties rather than treating it as a pure image classification task. Our method employs specialized encoder-decoder networks to precisely segment the lung and adjacent anatomy, including the lobes, tumor, mediastinum, and diaphragm. Subsequently, we extract the necessary tumor properties, i.e. measure the largest tumor dimension and calculate the distance between the tumor and neighboring anatomical structures by a quantitative analysis of the segmentation masks. Finally, we apply rule-based tumor staging aligned with the medical guidelines. This novel framework has been evaluated on the Lung-PET-CT-Dx dataset, demonstrating superior performance compared to traditional deep learning models, achieving an overall classification accuracy of 91.36%. We report the per-stage F1-scores of 0.93 (T1), 0.89 (T2), 0.96 (T3), and 0.90 (T4), a critical evaluation aspect often omitted in prior literature. To our knowledge, this is the first study that embeds explicit clinical context into tumor stage classification. Unlike standard convolutional neural networks that operate in an uninterpretable "black box" manner, our method offers both state-of-the-art performance and transparent decision support.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19365v1" target="_blank" rel="noopener noreferrer">
                DeCo：用于端到端图像生成的频率解耦像素扩散
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zehong Ma, Longhui Wei, Shuai Wang, Shiliang Zhang, Qi Tian
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于纯图像生成技术，属于计算机视觉领域的扩散模型改进。虽然标题提到端到端图像生成，但这属于纯粹的视觉内容生成范畴，与推荐系统、搜索或广告中的排名、检索、用户建模等核心任务没有直接关联。该技术没有明显的潜在应用场景可以转化为推荐、搜索或广告领域的实际解决方案。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:59:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19365v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19365v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Pixel diffusion aims to generate images directly in pixel space in an end-to-end fashion. This approach avoids the limitations of VAE in the two-stage latent diffusion, offering higher model capacity. Existing pixel diffusion models suffer from slow training and inference, as they usually model both high-frequency signals and low-frequency semantics within a single diffusion transformer (DiT). To pursue a more efficient pixel diffusion paradigm, we propose the frequency-DeCoupled pixel diffusion framework. With the intuition to decouple the generation of high and low frequency components, we leverage a lightweight pixel decoder to generate high-frequency details conditioned on semantic guidance from the DiT. This thus frees the DiT to specialize in modeling low-frequency semantics. In addition, we introduce a frequency-aware flow-matching loss that emphasizes visually salient frequencies while suppressing insignificant ones. Extensive experiments show that DeCo achieves superior performance among pixel diffusion models, attaining FID of 1.62 (256x256) and 2.22 (512x512) on ImageNet, closing the gap with latent diffusion methods. Furthermore, our pretrained text-to-image model achieves a leading overall score of 0.86 on GenEval in system-level comparison. Codes are publicly available at https://github.com/Zehong-Ma/DeCo.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19356v1" target="_blank" rel="noopener noreferrer">
                与生成器共同成长：用于视频生成的自适应步调GRPO
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Growing with the Generator: Self-paced GRPO for Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rui Li, Yuanzhi Liang, Ziqi Ni, Haibing Huang, Chi Zhang, Xuelong Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于视频生成技术，属于纯粹的视觉内容生成领域。虽然提到了GRPO（可能指某种强化学习优化方法），但论文明确针对视频生成应用，这与搜索、推荐或广告中的排序和匹配任务没有直接关联。视频生成属于AIGC范畴，属于明确排除的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:56:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19356v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19356v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Group Relative Policy Optimization (GRPO) has emerged as a powerful reinforcement learning paradigm for post-training video generation models. However, existing GRPO pipelines rely on static, fixed-capacity reward models whose evaluation behavior is frozen during training. Such rigid rewards introduce distributional bias, saturate quickly as the generator improves, and ultimately limit the stability and effectiveness of reinforcement-based alignment. We propose Self-Paced GRPO, a competence-aware GRPO framework in which reward feedback co-evolves with the generator. Our method introduces a progressive reward mechanism that automatically shifts its emphasis from coarse visual fidelity to temporal coherence and fine-grained text-video semantic alignment as generation quality increases. This self-paced curriculum alleviates reward-policy mismatch, mitigates reward exploitation, and yields more stable optimization. Experiments on VBench across multiple video generation backbones demonstrate consistent improvements in both visual quality and semantic alignment over GRPO baselines with static rewards, validating the effectiveness and generality of Self-Paced GRPO.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19351v1" target="_blank" rel="noopener noreferrer">
                CellFMCount：用于细胞计数的荧光显微镜数据集、基准与方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CellFMCount: A Fluorescence Microscopy Dataset, Benchmark, and Methods for Cell Counting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Abdurahman Ali Mohammed, Catherine Fonder, Ying Wei, Wallapak Tavanapong, Donald...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学/生物学领域的细胞计数应用，涉及荧光显微镜数据集和基准方法。这与我的关注领域（推荐系统、搜索、广告）完全无关，属于明确的无关主题中的医学/生物学应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:53:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19351v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19351v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate cell counting is essential in various biomedical research and clinical applications, including cancer diagnosis, stem cell research, and immunology. Manual counting is labor-intensive and error-prone, motivating automation through deep learning techniques. However, training reliable deep learning models requires large amounts of high-quality annotated data, which is difficult and time-consuming to produce manually. Consequently, existing cell-counting datasets are often limited, frequently containing fewer than $500$ images. In this work, we introduce a large-scale annotated dataset comprising $3{,}023$ images from immunocytochemistry experiments related to cellular differentiation, containing over $430{,}000$ manually annotated cell locations. The dataset presents significant challenges: high cell density, overlapping and morphologically diverse cells, a long-tailed distribution of cell count per image, and variation in staining protocols. We benchmark three categories of existing methods: regression-based, crowd-counting, and cell-counting techniques on a test set with cell counts ranging from $10$ to $2{,}126$ cells per image. We also evaluate how the Segment Anything Model (SAM) can be adapted for microscopy cell counting using only dot-annotated datasets. As a case study, we implement a density-map-based adaptation of SAM (SAM-Counter) and report a mean absolute error (MAE) of $22.12$, which outperforms existing approaches (second-best MAE of $27.46$). Our results underscore the value of the dataset and the benchmarking framework for driving progress in automated cell counting and provide a robust foundation for future research and development.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19326v1" target="_blank" rel="noopener noreferrer">
                MonoMSK：单目3D肌肉骨骼动力学估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MonoMSK: Monocular 3D Musculoskeletal Dynamics Estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Farnoosh Koleini, Hongfei Xue, Ahmed Helmy, Pu Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的肌肉骨骼动力学估计，使用单目视觉技术。这与我的关注点完全无关，因为我的关注领域仅限于推荐系统、搜索和广告中的技术进展，特别是与LLM和Transformer架构相关的技术。该论文属于医学/生物力学领域，属于明确排除的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:20:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19326v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19326v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reconstructing biomechanically realistic 3D human motion - recovering both kinematics (motion) and kinetics (forces) - is a critical challenge. While marker-based systems are lab-bound and slow, popular monocular methods use oversimplified, anatomically inaccurate models (e.g., SMPL) and ignore physics, fundamentally limiting their biomechanical fidelity. In this work, we introduce MonoMSK, a hybrid framework that bridges data-driven learning and physics-based simulation for biomechanically realistic 3D human motion estimation from monocular video. MonoMSK jointly recovers both kinematics (motions) and kinetics (forces and torques) through an anatomically accurate musculoskeletal model. By integrating transformer-based inverse dynamics with differentiable forward kinematics and dynamics layers governed by ODE-based simulation, MonoMSK establishes a physics-regulated inverse-forward loop that enforces biomechanical causality and physical plausibility. A novel forward-inverse consistency loss further aligns motion reconstruction with the underlying kinetic reasoning. Experiments on BML-MoVi, BEDLAM, and OpenCap show that MonoMSK significantly outperforms state-of-the-art methods in kinematic accuracy, while for the first time enabling precise monocular kinetics estimation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19320v1" target="_blank" rel="noopener noreferrer">
                SteadyDancer：具有首帧保持功能的协调一致的人体图像动画
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SteadyDancer: Harmonized and Coherent Human Image Animation with First-Frame Preservation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiaming Zhang, Shengming Cao, Rui Li, Xiaotong Zhao, Yutao Cui, Xinglin Hou, Gan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于人体图像动画生成技术，属于纯粹的计算机视觉领域。虽然标题提到'首帧保持'和'协调一致'等技术概念，但这些与推荐系统、搜索或广告的核心技术需求没有直接关联。该技术主要应用于内容生成和视觉特效，属于明确排除的'纯粹视觉'和'AIGC/内容生成'类别。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:15:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19320v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19320v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Preserving first-frame identity while ensuring precise motion control is a fundamental challenge in human image animation. The Image-to-Motion Binding process of the dominant Reference-to-Video (R2V) paradigm overlooks critical spatio-temporal misalignments common in real-world applications, leading to failures such as identity drift and visual artifacts. We introduce SteadyDancer, an Image-to-Video (I2V) paradigm-based framework that achieves harmonized and coherent animation and is the first to ensure first-frame preservation robustly. Firstly, we propose a Condition-Reconciliation Mechanism to harmonize the two conflicting conditions, enabling precise control without sacrificing fidelity. Secondly, we design Synergistic Pose Modulation Modules to generate an adaptive and coherent pose representation that is highly compatible with the reference image. Finally, we employ a Staged Decoupled-Objective Training Pipeline that hierarchically optimizes the model for motion fidelity, visual quality, and temporal coherence. Experiments demonstrate that SteadyDancer achieves state-of-the-art performance in both appearance fidelity and motion control, while requiring significantly fewer training resources than comparable methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19319v1" target="_blank" rel="noopener noreferrer">
                SyncMV4D：用于手-物体交互合成的外观与运动同步多视图联合扩散
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SyncMV4D: Synchronized Multi-view Joint Diffusion of Appearance and Motion for Hand-Object Interaction Synthesis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lingwei Dang, Zonghan Li, Juntong Li, Hongwen Zhang, Liang An, Yebin Liu, Qingya...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手-物体交互合成的计算机视觉任务，涉及多视图扩散模型和4D运动合成。虽然技术上有一定复杂性，但属于纯粹的视觉和图形学领域，与推荐系统、搜索或广告的核心技术栈没有明显关联。论文内容主要针对交互动作生成，无法识别出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:14:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19319v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19319v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Hand-Object Interaction (HOI) generation plays a critical role in advancing applications across animation and robotics. Current video-based methods are predominantly single-view, which impedes comprehensive 3D geometry perception and often results in geometric distortions or unrealistic motion patterns. While 3D HOI approaches can generate dynamically plausible motions, their dependence on high-quality 3D data captured in controlled laboratory settings severely limits their generalization to real-world scenarios. To overcome these limitations, we introduce SyncMV4D, the first model that jointly generates synchronized multi-view HOI videos and 4D motions by unifying visual prior, motion dynamics, and multi-view geometry. Our framework features two core innovations: (1) a Multi-view Joint Diffusion (MJD) model that co-generates HOI videos and intermediate motions, and (2) a Diffusion Points Aligner (DPA) that refines the coarse intermediate motion into globally aligned 4D metric point tracks. To tightly couple 2D appearance with 4D dynamics, we establish a closed-loop, mutually enhancing cycle. During the diffusion denoising process, the generated video conditions the refinement of the 4D motion, while the aligned 4D point tracks are reprojected to guide next-step joint generation. Experimentally, our method demonstrates superior performance to state-of-the-art alternatives in visual realism, motion plausibility, and multi-view consistency.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19316v1" target="_blank" rel="noopener noreferrer">
                评估数据集水印在定制化扩散模型微调溯源中的应用：综合基准与移除方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating Dataset Watermarking for Fine-tuning Traceability of Customized Diffusion Models: A Comprehensive Benchmark and Removal Approach
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xincheng Wang, Hanchi Sun, Wenjun Sun, Kejun Xue, Wangqiu Zhou, Jianbo Zhang, We...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注数据集水印技术和扩散模型的溯源问题，这属于安全、隐私和版权保护范畴，与您明确排除的指纹、安全、隐私等非技术主题直接相关。论文内容涉及AIGC和内容生成领域，但缺乏与推荐系统、搜索或广告排名的直接技术关联，因此不符合您的核心关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 17:11:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19316v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19316v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent fine-tuning techniques for diffusion models enable them to reproduce specific image sets, such as particular faces or artistic styles, but also introduce copyright and security risks. Dataset watermarking has been proposed to ensure traceability by embedding imperceptible watermarks into training images, which remain detectable in outputs even after fine-tuning. However, current methods lack a unified evaluation framework. To address this, this paper establishes a general threat model and introduces a comprehensive evaluation framework encompassing Universality, Transmissibility, and Robustness. Experiments show that existing methods perform well in universality and transmissibility, and exhibit some robustness against common image processing operations, yet still fall short under real-world threat scenarios. To reveal these vulnerabilities, the paper further proposes a practical watermark removal method that fully eliminates dataset watermarks without affecting fine-tuning, highlighting a key challenge for future research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19301v1" target="_blank" rel="noopener noreferrer">
                IDEAL-M3D：用于单目3D检测的实例多样性增强主动学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IDEAL-M3D: Instance Diversity-Enriched Active Learning for Monocular 3D Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Johannes Meier, Florian Günther, Riccardo Marin, Oussema Dhaouadi, Jacques Kaise...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的单目3D检测，属于纯粹的视觉任务，与推荐系统、搜索或广告没有直接关联。主动学习方法虽然通用，但论文的应用场景和核心贡献都局限于视觉领域，没有展示出在RecSys/Search/Ads中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:49:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19301v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19301v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Monocular 3D detection relies on just a single camera and is therefore easy to deploy. Yet, achieving reliable 3D understanding from monocular images requires substantial annotation, and 3D labels are especially costly. To maximize performance under constrained labeling budgets, it is essential to prioritize annotating samples expected to deliver the largest performance gains. This prioritization is the focus of active learning. Curiously, we observed two significant limitations in active learning algorithms for 3D monocular object detection. First, previous approaches select entire images, which is inefficient, as non-informative instances contained in the same image also need to be labeled. Secondly, existing methods rely on uncertainty-based selection, which in monocular 3D object detection creates a bias toward depth ambiguity. Consequently, distant objects are selected, while nearby objects are overlooked. To address these limitations, we propose IDEAL-M3D, the first instance-level pipeline for monocular 3D detection. For the first time, we demonstrate that an explicitly diverse, fast-to-train ensemble improves diversity-driven active learning for monocular 3D. We induce diversity with heterogeneous backbones and task-agnostic features, loss weight perturbation, and time-dependent bagging. IDEAL-M3D shows superior performance and significant resource savings: with just 60% of the annotations, we achieve similar or better AP3D on KITTI validation and test set results compared to training the same detector on the whole dataset.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19294v1" target="_blank" rel="noopener noreferrer">
                预先稠密化：激光雷达辅助的内容感知稠密化，用于高效且高质量的3D高斯泼溅
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DensifyBeforehand: LiDAR-assisted Content-aware Densification for Efficient and Quality 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Phurtivilai Patt, Leyang Huang, Yinqiang Zhang, Yang Lei
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于3D计算机视觉中的渲染技术，具体涉及3D高斯泼溅和激光雷达数据处理。虽然提到了效率优化，但这是纯粹的3D视觉应用，与推荐系统、搜索或广告没有任何直接或间接的关联。该技术没有明显的潜力应用于RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:39:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19294v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19294v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper addresses the limitations of existing 3D Gaussian Splatting (3DGS) methods, particularly their reliance on adaptive density control, which can lead to floating artifacts and inefficient resource usage. We propose a novel densify beforehand approach that enhances the initialization of 3D scenes by combining sparse LiDAR data with monocular depth estimation from corresponding RGB images. Our ROI-aware sampling scheme prioritizes semantically and geometrically important regions, yielding a dense point cloud that improves visual fidelity and computational efficiency. This densify beforehand approach bypasses the adaptive density control that may introduce redundant Gaussians in the original pipeline, allowing the optimization to focus on the other attributes of 3D Gaussian primitives, reducing overlap while enhancing visual quality. Our method achieves comparable results to state-of-the-art techniques while significantly lowering resource consumption and training time. We validate our approach through extensive comparisons and ablation studies on four newly collected datasets, showcasing its effectiveness in preserving regions of interest in complex scenes.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19268v1" target="_blank" rel="noopener noreferrer">
                BideDPO：同时实现文本与条件对齐的条件图像生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BideDPO: Conditional Image Generation with Simultaneous Text and Condition Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dewei Zhou, Mingwei Li, Zongxin Yang, Yu Lu, Yunqiu Xu, Zhizhong Wang, Zeyi Huan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于条件图像生成技术，属于纯粹的视觉内容生成领域。虽然提到了文本与条件对齐的概念，但这与推荐系统、搜索或广告中的排名和匹配任务没有直接关联。论文的核心技术方向属于AIGC和内容生成范畴，属于明确排除的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:20:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19268v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19268v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Conditional image generation enhances text-to-image synthesis with structural, spatial, or stylistic priors, but current methods face challenges in handling conflicts between sources. These include 1) input-level conflicts, where the conditioning image contradicts the text prompt, and 2) model-bias conflicts, where generative biases disrupt alignment even when conditions match the text. Addressing these conflicts requires nuanced solutions, which standard supervised fine-tuning struggles to provide. Preference-based optimization techniques like Direct Preference Optimization (DPO) show promise but are limited by gradient entanglement between text and condition signals and lack disentangled training data for multi-constraint tasks. To overcome this, we propose a bidirectionally decoupled DPO framework (BideDPO). Our method creates two disentangled preference pairs-one for the condition and one for the text-to reduce gradient entanglement. The influence of pairs is managed using an Adaptive Loss Balancing strategy for balanced optimization. We introduce an automated data pipeline to sample model outputs and generate conflict-aware data. This process is embedded in an iterative optimization strategy that refines both the model and the data. We construct a DualAlign benchmark to evaluate conflict resolution between text and condition. Experiments show BideDPO significantly improves text success rates (e.g., +35%) and condition adherence. We also validate our approach using the COCO dataset. Project Pages: https://limuloo.github.io/BideDPO/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19254v1" target="_blank" rel="noopener noreferrer">
                基于可微分3D模拟的视觉货物占用率估计对抗性补丁攻击
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adversarial Patch Attacks on Vision-Based Cargo Occupancy Estimation via Differentiable 3D Simulation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mohamed Rissal Hedna, Sesugh Samuel Nder
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究计算机视觉领域的对抗性攻击和安全漏洞，属于纯粹的视觉安全研究。虽然涉及货物占用率估计这一应用场景，但论文焦点是攻击方法和安全漏洞，而非推荐系统、搜索或广告的核心技术。该主题明确属于不相关的安全/隐私范畴，与用户当前关注的核心领域进展、LLM技术或Transformer架构无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:05:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19254v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19254v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Computer vision systems are increasingly adopted in modern logistics operations, including the estimation of trailer occupancy for planning, routing, and billing. Although effective, such systems may be vulnerable to physical adversarial attacks, particularly adversarial patches that can be printed and placed on interior surfaces. In this work, we study the feasibility of such attacks on a convolutional cargo-occupancy classifier using fully simulated 3D environments. Using Mitsuba 3 for differentiable rendering, we optimize patch textures across variations in geometry, lighting, and viewpoint, and compare their effectiveness to a 2D compositing baseline. Our experiments demonstrate that 3D-optimized patches achieve high attack success rates, especially in a denial-of-service scenario (empty to full), where success reaches 84.94 percent. Concealment attacks (full to empty) prove more challenging but still reach 30.32 percent. We analyze the factors influencing attack success, discuss implications for the security of automated logistics pipelines, and highlight directions for strengthening physical robustness. To our knowledge, this is the first study to investigate adversarial patch attacks for cargo-occupancy estimation in physically realistic, fully simulated 3D scenes.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19248v1" target="_blank" rel="noopener noreferrer">
                FedPoisonTTP：联邦测试时个性化的威胁模型与投毒攻击
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FedPoisonTTP: A Threat Model and Poisoning Attack for Federated Test-Time Personalization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md Akil Raihan Iftee, Syed Md. Ahnaf Hasan, Amin Ahsan Ali, AKM Mahbubur Rahman,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文明确涉及联邦学习和安全/隐私主题（投毒攻击），这些都属于明确排除的无关主题。论文标题直接聚焦于联邦学习环境中的安全威胁和攻击方法，与推荐系统、搜索或广告的核心技术进展、LLM应用或Transformer架构改进没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 16:02:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19248v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19248v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Test-time personalization in federated learning enables models at clients to adjust online to local domain shifts, enhancing robustness and personalization in deployment. Yet, existing federated learning work largely overlooks the security risks that arise when local adaptation occurs at test time. Heterogeneous domain arrivals, diverse adaptation algorithms, and limited cross-client visibility create vulnerabilities where compromised participants can craft poisoned inputs and submit adversarial updates that undermine both global and per-client performance. To address this threat, we introduce FedPoisonTTP, a realistic grey-box attack framework that explores test-time data poisoning in the federated adaptation setting. FedPoisonTTP distills a surrogate model from adversarial queries, synthesizes in-distribution poisons using feature-consistency, and optimizes attack objectives to generate high-entropy or class-confident poisons that evade common adaptation filters. These poisons are injected during local adaptation and spread through collaborative updates, leading to broad degradation. Extensive experiments on corrupted vision benchmarks show that compromised participants can substantially diminish overall test-time performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19229v1" target="_blank" rel="noopener noreferrer">
                学习即插即用记忆以指导视频扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning Plug-and-play Memory for Guiding Video Diffusion Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Selena Song, Ziming Xu, Zijun Zhang, Kun Zhou, Jiaxian Guo, Lianhui Qin, Biwei H...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成领域的扩散模型技术，属于纯粹的视觉内容生成范畴。虽然提到了记忆机制，但这是针对视频序列的特定应用，与推荐系统、搜索或广告中的异构数据处理没有直接关联。论文的技术方向与我的关注领域（推荐系统、搜索、广告及其中应用的LLM/Transformer技术）完全不相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:42:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19229v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19229v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion Transformer(DiT) based video generation models have recently achieved impressive visual quality and temporal coherence, but they still frequently violate basic physical laws and commonsense dynamics, revealing a lack of explicit world knowledge. In this work, we explore how to equip them with a plug-and-play memory that injects useful world knowledge. Motivated by in-context memory in Transformer-based LLMs, we conduct empirical studies to show that DiT can be steered via interventions on its hidden states, and simple low-pass and high-pass filters in the embedding space naturally disentangle low-level appearance and high-level physical/semantic cues, enabling targeted guidance. Building on these observations, we propose a learnable memory encoder DiT-Mem, composed of stacked 3D CNNs, low-/high-pass filters, and self-attention layers. The encoder maps reference videos into a compact set of memory tokens, which are concatenated as the memory within the DiT self-attention layers. During training, we keep the diffusion backbone frozen, and only optimize the memory encoder. It yields a rather efficient training process on few training parameters (150M) and 10K data samples, and enables plug-and-play usage at inference time. Extensive experiments on state-of-the-art models demonstrate the effectiveness of our method in improving physical rule following and video fidelity. Our code and data are publicly released here: https://thrcle421.github.io/DiT-Mem-Web/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19221v1" target="_blank" rel="noopener noreferrer">
                Percept-WAM：面向鲁棒端到端自动驾驶的感知增强型世界认知-行动模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jianhua Han, Meng Tian, Jiangtong Zhu, Fan He, Huixin Zhang, Sitong Guo, Dechang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域，这是一个与推荐系统、搜索或广告完全不同的应用领域。虽然提到了感知增强和世界认知模型，但这些技术主要针对物理世界理解和车辆控制，没有明显的潜在应用可以转移到推荐系统、搜索或广告领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:28:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19221v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19221v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Autonomous driving heavily relies on accurate and robust spatial perception. Many failures arise from inaccuracies and instability, especially in long-tail scenarios and complex interactions. However, current vision-language models are weak at spatial grounding and understanding, and VLA systems built on them therefore show limited perception and localization ability. To address these challenges, we introduce Percept-WAM, a perception-enhanced World-Awareness-Action Model that is the first to implicitly integrate 2D/3D scene understanding abilities within a single vision-language model (VLM). Instead of relying on QA-style spatial reasoning, Percept-WAM unifies 2D/3D perception tasks into World-PV and World-BEV tokens, which encode both spatial coordinates and confidence. We propose a grid-conditioned prediction mechanism for dense object perception, incorporating IoU-aware scoring and parallel autoregressive decoding, improving stability in long-tail, far-range, and small-object scenarios. Additionally, Percept-WAM leverages pretrained VLM parameters to retain general intelligence (e.g., logical reasoning) and can output perception results and trajectory control outputs directly. Experiments show that Percept-WAM matches or surpasses classical detectors and segmenters on downstream perception benchmarks, achieving 51.7/58.9 mAP on COCO 2D detection and nuScenes BEV 3D detection. When integrated with trajectory decoders, it further improves planning performance on nuScenes and NAVSIM, e.g., surpassing DiffusionDrive by 2.1 in PMDS on NAVSIM. Qualitative results further highlight its strong open-vocabulary and long-tail generalization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19220v1" target="_blank" rel="noopener noreferrer">
                大型视觉语言模型是否真正基于医学图像？来自意大利临床视觉问答的证据
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Are Large Vision Language Models Truly Grounded in Medical Images? Evidence from Italian Clinical Visual Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Federico Felizzi, Olivia Riccomi, Michele Ferramola, Francesco Andrea Causio, Ma...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的视觉问答应用，属于明确的医学领域特定应用，这在我的不相关主题列表中明确排除。虽然涉及视觉语言模型，但其医学图像和临床应用的焦点使其与推荐系统、搜索或广告领域完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:26:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19220v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19220v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large vision language models (VLMs) have achieved impressive performance on medical visual question answering benchmarks, yet their reliance on visual information remains unclear. We investigate whether frontier VLMs demonstrate genuine visual grounding when answering Italian medical questions by testing four state-of-the-art models: Claude Sonnet 4.5, GPT-4o, GPT-5-mini, and Gemini 2.0 flash exp. Using 60 questions from the EuropeMedQA Italian dataset that explicitly require image interpretation, we substitute correct medical images with blank placeholders to test whether models truly integrate visual and textual information. Our results reveal striking variability in visual dependency: GPT-4o shows the strongest visual grounding with a 27.9pp accuracy drop (83.2% [74.6%, 91.7%] to 55.3% [44.1%, 66.6%]), while GPT-5-mini, Gemini, and Claude maintain high accuracy with modest drops of 8.5pp, 2.4pp, and 5.6pp respectively. Analysis of model-generated reasoning reveals confident explanations for fabricated visual interpretations across all models, suggesting varying degrees of reliance on textual shortcuts versus genuine visual analysis. These findings highlight critical differences in model robustness and the need for rigorous evaluation before clinical deployment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19202v1" target="_blank" rel="noopener noreferrer">
                NVGS：用于3D高斯泼溅中遮挡剔除的神经可见性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Brent Zoomers, Florian Hahlbohm, Joni Vanherck, Lode Jorissen, Marcus Magnor, Ni...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D计算机视觉中的遮挡剔除技术，属于纯粹的3D视觉领域。虽然3D高斯泼溅是计算机图形学的前沿技术，但论文内容与推荐系统、搜索或广告的排名和建模需求没有直接关联，也不涉及LLM或Transformer架构的进步。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:11:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19202v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19202v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D Gaussian Splatting can exploit frustum culling and level-of-detail strategies to accelerate rendering of scenes containing a large number of primitives. However, the semi-transparent nature of Gaussians prevents the application of another highly effective technique: occlusion culling. We address this limitation by proposing a novel method to learn the viewpoint-dependent visibility function of all Gaussians in a trained model using a small, shared MLP across instances of an asset in a scene. By querying it for Gaussians within the viewing frustum prior to rasterization, our method can discard occluded primitives during rendering. Leveraging Tensor Cores for efficient computation, we integrate these neural queries directly into a novel instanced software rasterizer. Our approach outperforms the current state of the art for composed scenes in terms of VRAM usage and image quality, utilizing a combination of our instanced rasterizer and occlusion culling MLP, and exhibits complementary properties to existing LoD techniques.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19198v1" target="_blank" rel="noopener noreferrer">
                基于人工神经网络的三维解剖数据生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Three-Dimensional Anatomical Data Generation Based on Artificial Neural Networks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ann-Sophia Müller, Moonkwang Jeong, Meng Zhang, Jiyuan Tian, Arkadiusz Miernik, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的三维解剖数据生成，属于明确的无关主题范畴。虽然涉及神经网络技术，但其应用领域（医学解剖）与搜索、推荐、广告系统完全无关，且不包含任何可能应用于这些领域的潜在技术迁移。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 15:07:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19198v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19198v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Surgical planning and training based on machine learning requires a large amount of 3D anatomical models reconstructed from medical imaging, which is currently one of the major bottlenecks. Obtaining these data from real patients and during surgery is very demanding, if even possible, due to legal, ethical, and technical challenges. It is especially difficult for soft tissue organs with poor imaging contrast, such as the prostate. To overcome these challenges, we present a novel workflow for automated 3D anatomical data generation using data obtained from physical organ models. We additionally use a 3D Generative Adversarial Network (GAN) to obtain a manifold of 3D models useful for other downstream machine learning tasks that rely on 3D data. We demonstrate our workflow using an artificial prostate model made of biomimetic hydrogels with imaging contrast in multiple zones. This is used to physically simulate endoscopic surgery. For evaluation and 3D data generation, we place it into a customized ultrasound scanner that records the prostate before and after the procedure. A neural network is trained to segment the recorded ultrasound images, which outperforms conventional, non-learning-based computer vision techniques in terms of intersection over union (IoU). Based on the segmentations, a 3D mesh model is reconstructed, and performance feedback is provided.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19187v1" target="_blank" rel="noopener noreferrer">
                SpectraNet：用于深度伪造人脸检测的FFT辅助深度学习分类器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SpectraNet: FFT-assisted Deep Learning Classifier for Deepfake Face Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nithira Jayarathne, Naveen Basnayake, Keshawa Jayasundara, Pasindu Dodampegama, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1"></p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:54:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19187v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19187v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Detecting deepfake images is crucial in combating misinformation. We present a lightweight, generalizable binary classification model based on EfficientNet-B6, fine-tuned with transformation techniques to address severe class imbalances. By leveraging robust preprocessing, oversampling, and optimization strategies, our model achieves high accuracy, stability, and generalization. While incorporating Fourier transform-based phase and amplitude features showed minimal impact, our proposed framework helps non-experts to effectively identify deepfake images, making significant strides toward accessible and reliable deepfake detection.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19183v1" target="_blank" rel="noopener noreferrer">
                nnActive：3D生物医学分割中主动学习评估框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            nnActive: A Framework for Evaluation of Active Learning in 3D Biomedical Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Carsten T. Lüth, Jeremias Traub, Kim-Celine Kahl, Till J. Bungert, Lukas Klein, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D生物医学分割领域的主动学习评估框架，属于医学和生物医学应用领域。根据您的关注点，这属于明确的无关主题，与推荐系统、搜索、广告或相关LLM技术没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:50:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19183v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19183v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Semantic segmentation is crucial for various biomedical applications, yet its reliance on large annotated datasets presents a bottleneck due to the high cost and specialized expertise required for manual labeling. Active Learning (AL) aims to mitigate this challenge by querying only the most informative samples, thereby reducing annotation effort. However, in the domain of 3D biomedical imaging, there is no consensus on whether AL consistently outperforms Random sampling. Four evaluation pitfalls hinder the current methodological assessment. These are (1) restriction to too few datasets and annotation budgets, (2) using 2D models on 3D images without partial annotations, (3) Random baseline not being adapted to the task, and (4) measuring annotation cost only in voxels. In this work, we introduce nnActive, an open-source AL framework that overcomes these pitfalls by (1) means of a large scale study spanning four biomedical imaging datasets and three label regimes, (2) extending nnU-Net by using partial annotations for training with 3D patch-based query selection, (3) proposing Foreground Aware Random sampling strategies tackling the foreground-background class imbalance of medical images and (4) propose the foreground efficiency metric, which captures the low annotation cost of background-regions. We reveal the following findings: (A) while all AL methods outperform standard Random sampling, none reliably surpasses an improved Foreground Aware Random sampling; (B) benefits of AL depend on task specific parameters; (C) Predictive Entropy is overall the best performing AL method, but likely requires the most annotation effort; (D) AL performance can be improved with more compute intensive design choices. As a holistic, open-source framework, nnActive can serve as a catalyst for research and application of AL in 3D biomedical imaging. Code is at: https://github.com/MIC-DKFZ/nnActive
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19180v1" target="_blank" rel="noopener noreferrer">
                评估用于源相机识别的深度学习和传统方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating Deep Learning and Traditional Approaches Used in Source Camera Identification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mansur Ozaman
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于源相机识别，这属于数字取证和图像安全领域，与推荐系统、搜索或广告的核心技术完全无关。论文内容涉及指纹识别和安全分析，这些都属于明确排除的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:42:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19180v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19180v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    One of the most important tasks in computer vision is identifying the device using which the image was taken, useful for facilitating further comprehensive analysis of the image. This paper presents comparative analysis of three techniques used in source camera identification (SCI): Photo Response Non-Uniformity (PRNU), JPEG compression artifact analysis, and convolutional neural networks (CNNs). It evaluates each method in terms of device classification accuracy. Furthermore, the research discusses the possible scientific development needed for the implementation of the methods in real-life scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19172v1" target="_blank" rel="noopener noreferrer">
                MetroGS：高效稳定地重建几何精确的高保真大规模场景
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kehua Chen, Tianlu Mao, Zhuxin Ma, Hao Jiang, Zehao Li, Zihan Liu, Shuqi Gao, Ho...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D场景重建和计算机图形学，属于纯粹的视觉/3D视觉领域。虽然涉及大规模场景处理，但缺乏与推荐系统、搜索或广告相关的直接或间接应用场景。论文的技术内容主要围绕几何重建和渲染保真度，与我的核心关注领域没有明显关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:34:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19172v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19172v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recently, 3D Gaussian Splatting and its derivatives have achieved significant breakthroughs in large-scale scene reconstruction. However, how to efficiently and stably achieve high-quality geometric fidelity remains a core challenge. To address this issue, we introduce MetroGS, a novel Gaussian Splatting framework for efficient and robust reconstruction in complex urban environments. Our method is built upon a distributed 2D Gaussian Splatting representation as the core foundation, serving as a unified backbone for subsequent modules. To handle potential sparse regions in complex scenes, we propose a structured dense enhancement scheme that utilizes SfM priors and a pointmap model to achieve a denser initialization, while incorporating a sparsity compensation mechanism to improve reconstruction completeness. Furthermore, we design a progressive hybrid geometric optimization strategy that organically integrates monocular and multi-view optimization to achieve efficient and accurate geometric refinement. Finally, to address the appearance inconsistency commonly observed in large-scale scenes, we introduce a depth-guided appearance modeling approach that learns spatial features with 3D consistency, facilitating effective decoupling between geometry and appearance and further enhancing reconstruction stability. Experiments on large-scale urban datasets demonstrate that MetroGS achieves superior geometric accuracy, rendering quality, offering a unified solution for high-fidelity large-scale scene reconstruction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19137v1" target="_blank" rel="noopener noreferrer">
                FilmSceneDesigner：用于程序化电影场景生成的链式场景设计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FilmSceneDesigner: Chaining Set Design for Procedural Film Scene Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhifeng Xie, Keyi Zhang, Yiye Yan, Yuling Guo, Fan Yang, Jiting Zhou, Mengtian L...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于电影场景的程序化生成和设计，属于内容生成领域，与推荐系统、搜索或广告的核心技术无关。虽然涉及链式设计概念，但应用场景仅限于影视制作，没有明显的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 14:00:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19137v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19137v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Film set design plays a pivotal role in cinematic storytelling and shaping the visual atmosphere. However, the traditional process depends on expert-driven manual modeling, which is labor-intensive and time-consuming. To address this issue, we introduce FilmSceneDesigner, an automated scene generation system that emulates professional film set design workflow. Given a natural language description, including scene type, historical period, and style, we design an agent-based chaining framework to generate structured parameters aligned with film set design workflow, guided by prompt strategies that ensure parameter accuracy and coherence. On the other hand, we propose a procedural generation pipeline which executes a series of dedicated functions with the structured parameters for floorplan and structure generation, material assignment, door and window placement, and object retrieval and layout, ultimately constructing a complete film scene from scratch. Moreover, to enhance cinematic realism and asset diversity, we construct SetDepot-Pro, a curated dataset of 6,862 film-specific 3D assets and 733 materials. Experimental results and human evaluations demonstrate that our system produces structurally sound scenes with strong cinematic fidelity, supporting downstream tasks such as virtual previs, construction drawing and mood board creation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19134v1" target="_blank" rel="noopener noreferrer">
                MambaRefine-YOLO：一种用于无人机图像的双模态小目标检测器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MambaRefine-YOLO: A Dual-Modality Small Object Detector for UAV Imagery
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuyu Cao, Minxin Chen, Yucheng Song, Zhaozhong Chen, Xinyou Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于无人机图像中的小目标检测，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告没有直接关联。虽然提到了双模态，但这指的是视觉模态内的不同特征，而非推荐系统中常见的异构数据融合。论文内容完全落在被排除的纯粹视觉研究范畴内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:59:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19134v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19134v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Small object detection in Unmanned Aerial Vehicle (UAV) imagery is a persistent challenge, hindered by low resolution and background clutter. While fusing RGB and infrared (IR) data offers a promising solution, existing methods often struggle with the trade-off between effective cross-modal interaction and computational efficiency. In this letter, we introduce MambaRefine-YOLO. Its core contributions are a Dual-Gated Complementary Mamba fusion module (DGC-MFM) that adaptively balances RGB and IR modalities through illumination-aware and difference-aware gating mechanisms, and a Hierarchical Feature Aggregation Neck (HFAN) that uses a ``refine-then-fuse'' strategy to enhance multi-scale features. Our comprehensive experiments validate this dual-pronged approach. On the dual-modality DroneVehicle dataset, the full model achieves a state-of-the-art mAP of 83.2%, an improvement of 7.9% over the baseline. On the single-modality VisDrone dataset, a variant using only the HFAN also shows significant gains, demonstrating its general applicability. Our work presents a superior balance between accuracy and speed, making it highly suitable for real-world UAV applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19117v1" target="_blank" rel="noopener noreferrer">
                3M-TI：通过免校准多摄像头跨模态扩散实现高质量移动热成像
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Minchong Chen, Xiaoyun Yuan, Junzhe Wan, Jianing Zhang, Jun Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于移动热成像技术，涉及计算机视觉和传感器融合领域。虽然提到了跨模态扩散，但这与推荐系统、搜索或广告的核心技术需求无关，也不涉及Transformer架构改进或LLM技术。该技术主要面向视觉感知应用，没有明显的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:48:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19117v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19117v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">physics.optics</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The miniaturization of thermal sensors for mobile platforms inherently limits their spatial resolution and textural fidelity, leading to blurry and less informative images. Existing thermal super-resolution (SR) methods can be grouped into single-image and RGB-guided approaches: the former struggles to recover fine structures from limited information, while the latter relies on accurate and laborious cross-camera calibration, which hinders practical deployment and robustness. Here, we propose 3M-TI, a calibration-free Multi-camera cross-Modality diffusion framework for Mobile Thermal Imaging. At its core, 3M-TI integrates a cross-modal self-attention module (CSM) into the diffusion UNet, replacing the original self-attention layers to adaptively align thermal and RGB features throughout the denoising process, without requiring explicit camera calibration. This design enables the diffusion network to leverage its generative prior to enhance spatial resolution, structural fidelity, and texture detail in the super-resolved thermal images. Extensive evaluations on real-world mobile thermal cameras and public benchmarks validate our superior performance, achieving state-of-the-art results in both visual quality and quantitative metrics. More importantly, the thermal images enhanced by 3M-TI lead to substantial gains in critical downstream tasks like object detection and segmentation, underscoring its practical value for robust mobile thermal perception systems. More materials: https://github.com/work-submit/3MTI.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19111v1" target="_blank" rel="noopener noreferrer">
                DiffSeg30k：面向局部化AIGC检测的多轮扩散编辑基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DiffSeg30k: A Multi-Turn Diffusion Editing Benchmark for Localized AIGC Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hai Ci, Ziheng Peng, Pei Yang, Yingxin Xuan, Mike Zheng Shou
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于AIGC检测基准构建，属于内容生成和检测领域，与我的核心关注点（推荐系统、搜索、广告中的排名算法、LLM应用、Transformer架构等）无关。论文涉及扩散模型编辑和AIGC检测，这些都属于纯粹的AIGC和内容生成范畴，没有明确的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:43:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19111v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19111v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion-based editing enables realistic modification of local image regions, making AI-generated content harder to detect. Existing AIGC detection benchmarks focus on classifying entire images, overlooking the localization of diffusion-based edits. We introduce DiffSeg30k, a publicly available dataset of 30k diffusion-edited images with pixel-level annotations, designed to support fine-grained detection. DiffSeg30k features: 1) In-the-wild images--we collect images or image prompts from COCO to reflect real-world content diversity; 2) Diverse diffusion models--local edits using eight SOTA diffusion models; 3) Multi-turn editing--each image undergoes up to three sequential edits to mimic real-world sequential editing; and 4) Realistic editing scenarios--a vision-language model (VLM)-based pipeline automatically identifies meaningful regions and generates context-aware prompts covering additions, removals, and attribute changes. DiffSeg30k shifts AIGC detection from binary classification to semantic segmentation, enabling simultaneous localization of edits and identification of the editing models. We benchmark three baseline segmentation approaches, revealing significant challenges in semantic segmentation tasks, particularly concerning robustness to image distortions. Experiments also reveal that segmentation models, despite being trained for pixel-level localization, emerge as highly reliable whole-image classifiers of diffusion edits, outperforming established forgery classifiers while showing great potential in cross-generator generalization. We believe DiffSeg30k will advance research in fine-grained localization of AI-generated content by demonstrating the promise and limitations of segmentation-based methods. DiffSeg30k is released at: https://huggingface.co/datasets/Chaos2629/Diffseg30k
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19109v1" target="_blank" rel="noopener noreferrer">
                HABIT：CARLA中交互交通的人类行为基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HABIT: Human Action Benchmark for Interactive Traffic in CARLA
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mohan Ramesh, Mark Azer, Fabian B. Flohr
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于自动驾驶领域的交互交通基准测试，使用CARLA模拟器进行人类行为评估。这与推荐系统、搜索或广告的核心领域完全无关，也不涉及LLM技术、Transformer架构或异构数据建模。该研究属于纯粹的自动驾驶/交通模拟领域，没有任何潜在的应用于RecSys/Search/Ads的可能性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:43:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19109v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19109v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Current autonomous driving (AD) simulations are critically limited by their inadequate representation of realistic and diverse human behavior, which is essential for ensuring safety and reliability. Existing benchmarks often simplify pedestrian interactions, failing to capture complex, dynamic intentions and varied responses critical for robust system deployment. To overcome this, we introduce HABIT (Human Action Benchmark for Interactive Traffic), a high-fidelity simulation benchmark. HABIT integrates real-world human motion, sourced from mocap and videos, into CARLA (Car Learning to Act, a full autonomous driving simulator) via a modular, extensible, and physically consistent motion retargeting pipeline. From an initial pool of approximately 30,000 retargeted motions, we curate 4,730 traffic-compatible pedestrian motions, standardized in SMPL format for physically consistent trajectories. HABIT seamlessly integrates with CARLA's Leaderboard, enabling automated scenario generation and rigorous agent evaluation. Our safety metrics, including Abbreviated Injury Scale (AIS) and False Positive Braking Rate (FPBR), reveal critical failure modes in state-of-the-art AD agents missed by prior evaluations. Evaluating three state-of-the-art autonomous driving agents, InterFuser, TransFuser, and BEVDriver, demonstrates how HABIT exposes planner weaknesses that remain hidden in scripted simulations. Despite achieving close or equal to zero collisions per kilometer on the CARLA Leaderboard, the autonomous agents perform notably worse on HABIT, with up to 7.43 collisions/km and a 12.94% AIS 3+ injury risk, and they brake unnecessarily in up to 33% of cases. All components are publicly released to support reproducible, pedestrian-aware AI research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19105v1" target="_blank" rel="noopener noreferrer">
                基于WiFi信号的图结构三维人体姿态估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Graph-based 3D Human Pose Estimation using WiFi Signals
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jichao Chen, YangYang Qu, Ruibo Tang, Dirk Slock
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于使用WiFi信号进行3D人体姿态估计，这属于计算机视觉和传感技术的交叉领域。虽然采用了图结构方法，但核心应用场景（人体姿态估计）与推荐系统、搜索或广告的排名任务没有直接关联，也不涉及LLM或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:40:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19105v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19105v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    WiFi-based human pose estimation (HPE) has attracted increasing attention due to its resilience to occlusion and privacy-preserving compared to camera-based methods. However, existing WiFi-based HPE approaches often employ regression networks that directly map WiFi channel state information (CSI) to 3D joint coordinates, ignoring the inherent topological relationships among human joints. In this paper, we present GraphPose-Fi, a graph-based framework that explicitly models skeletal topology for WiFi-based 3D HPE. Our framework comprises a CNN encoder shared across antennas for subcarrier-time feature extraction, a lightweight attention module that adaptively reweights features over time and across antennas, and a graph-based regression head that combines GCN layers with self-attention to capture local topology and global dependencies. Our proposed method significantly outperforms existing methods on the MM-Fi dataset in various settings. The source code is available at: https://github.com/Cirrick/GraphPose-Fi.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19071v1" target="_blank" rel="noopener noreferrer">
                DEAP-3DSAM：用于3D医学图像分割的解码器增强与自动提示SAM
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DEAP-3DSAM: Decoder Enhanced and Auto Prompt SAM for 3D Medical Image Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fangda Chen, Jintao Tang, Pancheng Wang, Ting Wang, Shasha Li, Ting Deng
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D医学图像分割，属于明确的医学领域应用，与RecSys、搜索或广告无关。虽然提到了SAM（Segment Anything Model），但应用于医学图像这一特定领域，且没有展示任何在推荐系统、搜索或广告中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 13:07:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19071v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19071v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The Segment Anything Model (SAM) has recently demonstrated significant potential in medical image segmentation. Although SAM is primarily trained on 2D images, attempts have been made to apply it to 3D medical image segmentation. However, the pseudo 3D processing used to adapt SAM results in spatial feature loss, limiting its performance. Additionally, most SAM-based methods still rely on manual prompts, which are challenging to implement in real-world scenarios and require extensive external expert knowledge. To address these limitations, we introduce the Decoder Enhanced and Auto Prompt SAM (DEAP-3DSAM) to tackle these limitations. Specifically, we propose a Feature Enhanced Decoder that fuses the original image features with rich and detailed spatial information to enhance spatial features. We also design a Dual Attention Prompter to automatically obtain prompt information through Spatial Attention and Channel Attention. We conduct comprehensive experiments on four public abdominal tumor segmentation datasets. The results indicate that our DEAP-3DSAM achieves state-of-the-art performance in 3D image segmentation, outperforming or matching existing manual prompt methods. Furthermore, both quantitative and qualitative ablation studies confirm the effectiveness of our proposed modules.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19065v1" target="_blank" rel="noopener noreferrer">
                理解、加速和改进MeanFlow训练
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Understanding, Accelerating, and Improving MeanFlow Training
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jin-Young Kim, Hyojun Go, Lea Bogensperger, Julius Erbach, Nikolai Kalischek, Fe...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题涉及MeanFlow训练方法，这似乎是流体动力学或物理模拟领域的特定技术，与推荐系统、搜索或广告没有明显关联。标题中没有提到任何与Transformer架构、LLM技术或推荐系统相关的关键词，因此无法识别出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:59:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19065v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19065v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    MeanFlow promises high-quality generative modeling in few steps, by jointly learning instantaneous and average velocity fields. Yet, the underlying training dynamics remain unclear. We analyze the interaction between the two velocities and find: (i) well-established instantaneous velocity is a prerequisite for learning average velocity; (ii) learning of instantaneous velocity benefits from average velocity when the temporal gap is small, but degrades as the gap increases; and (iii) task-affinity analysis indicates that smooth learning of large-gap average velocities, essential for one-step generation, depends on the prior formation of accurate instantaneous and small-gap average velocities. Guided by these observations, we design an effective training scheme that accelerates the formation of instantaneous velocity, then shifts emphasis from short- to long-interval average velocity. Our enhanced MeanFlow training yields faster convergence and significantly better few-step generation: With the same DiT-XL backbone, our method reaches an impressive FID of 2.87 on 1-NFE ImageNet 256x256, compared to 3.43 for the conventional MeanFlow baseline. Alternatively, our method matches the performance of the MeanFlow baseline with 2.5x shorter training time, or with a smaller DiT-L backbone.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19057v1" target="_blank" rel="noopener noreferrer">
                LAA3D：三维空间中低空飞行器检测与跟踪基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LAA3D: A Benchmark of Detecting and Tracking Low-Altitude Aircraft in 3D Space
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hai Wu, Shuai Tang, Jiale Wang, Longkun Zou, Mingyue Guo, Rongqin Liang, Ke Chen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于三维空间中低空飞行器的检测与跟踪基准，属于纯粹的计算机视觉和3D视觉领域。虽然检测和跟踪技术在某些场景下可能具有通用性，但该论文明确聚焦于航空器这一特定领域应用，与推荐系统、搜索或广告的核心技术栈没有任何直接关联，也不涉及任何Transformer架构或LLM技术的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:50:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19057v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19057v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Perception of Low-Altitude Aircraft (LAA) in 3D space enables precise 3D object localization and behavior understanding. However, datasets tailored for 3D LAA perception remain scarce. To address this gap, we present LAA3D, a large-scale dataset designed to advance 3D detection and tracking of low-altitude aerial vehicles. LAA3D contains 15,000 real images and 600,000 synthetic frames, captured across diverse scenarios, including urban and suburban environments. It covers multiple aerial object categories, including electric Vertical Take-Off and Landing (eVTOL) aircraft, Micro Aerial Vehicles (MAVs), and Helicopters. Each instance is annotated with 3D bounding box, class label, and instance identity, supporting tasks such as 3D object detection, 3D multi-object tracking (MOT), and 6-DoF pose estimation. Besides, we establish the LAA3D Benchmark, integrating multiple tasks and methods with unified evaluation protocols for comparison. Furthermore, we propose MonoLAA, a monocular 3D detection baseline, achieving robust 3D localization from zoom cameras with varying focal lengths. Models pretrained on synthetic images transfer effectively to real-world data with fine-tuning, demonstrating strong sim-to-real generalization. Our LAA3D provides a comprehensive foundation for future research in low-altitude 3D object perception.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19046v1" target="_blank" rel="noopener noreferrer">
                MedSAM3：深入探索基于医学概念的通用分割模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MedSAM3: Delving into Segment Anything with Medical Concepts
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Anglin Liu, Rundong Xue, Xu R. Cao, Yifan Shen, Yi Lu, Xiang Li, Qianqian Chen, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像分割领域，属于明确的医学应用范畴。虽然标题提到'Segment Anything'概念，但具体应用于医学领域，与推荐系统、搜索或广告的技术焦点完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:34:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19046v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19046v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Medical image segmentation is fundamental for biomedical discovery. Existing methods lack generalizability and demand extensive, time-consuming manual annotation for new clinical application. Here, we propose MedSAM-3, a text promptable medical segmentation model for medical image and video segmentation. By fine-tuning the Segment Anything Model (SAM) 3 architecture on medical images paired with semantic conceptual labels, our MedSAM-3 enables medical Promptable Concept Segmentation (PCS), allowing precise targeting of anatomical structures via open-vocabulary text descriptions rather than solely geometric prompts. We further introduce the MedSAM-3 Agent, a framework that integrates Multimodal Large Language Models (MLLMs) to perform complex reasoning and iterative refinement in an agent-in-the-loop workflow. Comprehensive experiments across diverse medical imaging modalities, including X-ray, MRI, Ultrasound, CT, and video, demonstrate that our approach significantly outperforms existing specialist and foundation models. We will release our code and model at https://github.com/Joey-S-Liu/MedSAM3.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19035v1" target="_blank" rel="noopener noreferrer">
                CSD：仅使用语义变化掩码进行变化语义检测，用于冲突区域的损害评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CSD: Change Semantic Detection with only Semantic Change Masks for Damage Assessment in Conflict Zones
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kai Zhenga, Zhenkai Wu, Fupeng Wei, Miaolan Zhou, Kai Lie, Haitao Guo, Lei Ding,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及冲突区域的损害评估和语义变化检测，属于计算机视觉应用领域。与我的关注点（推荐系统、搜索、广告中的LLM应用、Transformer架构进展或异构数据统一建模）没有直接关联。该技术主要面向地理空间分析和灾害评估，缺乏在推荐、搜索或广告领域的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:16:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19035v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19035v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurately and swiftly assessing damage from conflicts is crucial for humanitarian aid and regional stability. In conflict zones, damaged zones often share similar architectural styles, with damage typically covering small areas and exhibiting blurred boundaries. These characteristics lead to limited data, annotation difficulties, and significant recognition challenges, including high intra-class similarity and ambiguous semantic changes. To address these issues, we introduce a pre-trained DINOv3 model and propose a multi-scale cross-attention difference siamese network (MC-DiSNet). The powerful visual representation capability of the DINOv3 backbone enables robust and rich feature extraction from bi-temporal remote sensing images. We also release a new Gaza-change dataset containing high-resolution satellite image pairs from 2023-2024 with pixel-level semantic change annotations. It is worth emphasizing that our annotations only include semantic pixels of changed areas. Unlike conventional semantic change detection (SCD), our approach eliminates the need for large-scale semantic annotations of bi-temporal images, instead focusing directly on the changed regions. We term this new task change semantic detection (CSD). The CSD task represents a direct extension of binary change detection (BCD). Due to the limited spatial extent of semantic regions, it presents greater challenges than traditional SCD tasks. We evaluated our method under the CSD framework on both the Gaza-Change and SECOND datasets. Experimental results demonstrate that our proposed approach effectively addresses the CSD task, and its outstanding performance paves the way for practical applications in rapid damage assessment across conflict zones.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.19033v1" target="_blank" rel="noopener noreferrer">
                ReEXplore：通过情境化回顾经验重放改进多模态大语言模型在具身探索中的表现
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReEXplore: Improving MLLMs for Embodied Exploration with Contextualized Retrospective Experience Replay
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gengyuan Zhang, Mingcong Ding, Jingpei Wu, Ruotong Liao, Volker Tresp
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于多模态大语言模型在具身探索（embodied exploration）中的应用，这属于机器人学和具身智能领域，与推荐系统、搜索或广告的核心技术焦点无关。论文标题中提到的经验重放技术主要针对强化学习环境中的智能体训练，没有显示出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 12:13:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.19033v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.19033v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Embodied exploration is a target-driven process that requires embodied agents to possess fine-grained perception and knowledge-enhanced decision making. While recent attempts leverage MLLMs for exploration due to their strong perceptual and reasoning abilities, we find that MLLM-based embodied agents remain suboptimal in exploring new environments: (i) they rely on profound but stale pre-trained knowledge, (ii) training-based approaches such as imitation learning or reinforcement learning are expensive for long-horizon tasks with sparse outcome rewards, and (iii) frontier-based exploration yields a large, visually nuanced action space that is difficult for MLLMs to make reliable decisions. We address these challenges with ReEXplore, a training-free framework that performs retrospective experience replay to inject distilled, abstract experience at inference time, and hierarchical frontier selection to decompose frontier ranking into coarse-to-fine decisions. Our approach enables robust, traceable, and efficient exploration. Across multiple embodied exploration benchmarks, ReEXplore yields great improvements over strong MLLM baselines, up to 3x higher performance in both success rate and in navigation efficiency under open-source backbones.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18993v1" target="_blank" rel="noopener noreferrer">
                AuViRe：用于深度伪造时序定位的视听语音表示重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AuViRe: Audio-visual Speech Representation Reconstruction for Deepfake Temporal Localization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Christos Koutlis, Symeon Papadopoulos
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于音频-视觉深度伪造检测和时序定位，属于多媒体取证领域。虽然涉及多模态建模，但主要应用于安全检测而非推荐、搜索或广告系统。论文内容与用户关注的LLM技术、推荐系统架构或Transformer改进等核心方向没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:19:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18993v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18993v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid advancement of sophisticated synthetic audio-visual content, e.g., for subtle malicious manipulations, ensuring the integrity of digital media has become paramount. This work presents a novel approach to temporal localization of deepfakes by leveraging Audio-Visual Speech Representation Reconstruction (AuViRe). Specifically, our approach reconstructs speech representations from one modality (e.g., lip movements) based on the other (e.g., audio waveform). Cross-modal reconstruction is significantly more challenging in manipulated video segments, leading to amplified discrepancies, thereby providing robust discriminative cues for precise temporal forgery localization. AuViRe outperforms the state of the art by +8.9 AP@0.95 on LAV-DF, +9.6 AP@0.5 on AV-Deepfake1M, and +5.1 AUC on an in-the-wild experiment. Code available at https://github.com/mever-team/auvire.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18989v1" target="_blank" rel="noopener noreferrer">
                重新思考植物病害诊断：通过视觉Transformer和零样本学习弥合学术与实践差距
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Rethinking Plant Disease Diagnosis: Bridging the Academic-Practical Gap with Vision Transformers and Zero-Shot Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wassim Benabbas, Mohammed Brahimi, Samir Akhrouf, Bilal Fortas
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于植物病害诊断的计算机视觉应用，属于农业领域的特定应用。虽然提到了视觉Transformer技术，但其应用场景与推荐系统、搜索或广告领域完全无关。该研究属于纯粹的视觉应用，没有展示出在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 11:08:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18989v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18989v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in deep learning have enabled significant progress in plant disease classification using leaf images. Much of the existing research in this field has relied on the PlantVillage dataset, which consists of well-centered plant images captured against uniform, uncluttered backgrounds. Although models trained on this dataset achieve high accuracy, they often fail to generalize to real-world field images, such as those submitted by farmers to plant diagnostic systems. This has created a significant gap between published studies and practical application requirements, highlighting the necessity of investigating and addressing this issue. In this study, we investigate whether attention-based architectures and zero-shot learning approaches can bridge the gap between curated academic datasets and real-world agricultural conditions in plant disease classification. We evaluate three model categories: Convolutional Neural Networks (CNNs), Vision Transformers, and Contrastive Language-Image Pre-training (CLIP)-based zero-shot models. While CNNs exhibit limited robustness under domain shift, Vision Transformers demonstrate stronger generalization by capturing global contextual features. Most notably, CLIP models classify diseases directly from natural language descriptions without any task-specific training, offering strong adaptability and interpretability. These findings highlight the potential of zero-shot learning as a practical and scalable domain adaptation strategy for plant health diagnosis in diverse field environments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18978v1" target="_blank" rel="noopener noreferrer">
                基于视觉语言基础模型对全切片图像中皮肤肿瘤的零样本分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Zero-shot segmentation of skin tumors in whole-slide images with vision-language foundation models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Santiago Moreno, Pablo Meseguer, Rocío del Amor, Valery Naranjo
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于医学图像分析中的皮肤肿瘤分割，属于医学/生物领域的特定应用。虽然提到了视觉语言基础模型，但其应用场景（皮肤肿瘤、全切片图像）与推荐系统、搜索或广告领域完全无关，且没有显示出任何在这些领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:50:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18978v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18978v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate annotation of cutaneous neoplasm biopsies represents a major challenge due to their wide morphological variability, overlapping histological patterns, and the subtle distinctions between benign and malignant lesions. Vision-language foundation models (VLMs), pre-trained on paired image-text corpora, learn joint representations that bridge visual features and diagnostic terminology, enabling zero-shot localization and classification of tissue regions without pixel-level labels. However, most existing VLM applications in histopathology remain limited to slide-level tasks or rely on coarse interactive prompts, and they struggle to produce fine-grained segmentations across gigapixel whole-slide images (WSIs). In this work, we introduce a zero-shot visual-language segmentation pipeline for whole-slide images (ZEUS), a fully automated, zero-shot segmentation framework that leverages class-specific textual prompt ensembles and frozen VLM encoders to generate high-resolution tumor masks in WSIs. By partitioning each WSI into overlapping patches, extracting visual embeddings, and computing cosine similarities against text prompts, we generate a final segmentation mask. We demonstrate competitive performance on two in-house datasets, primary spindle cell neoplasms and cutaneous metastases, highlighting the influence of prompt design, domain shifts, and institutional variability in VLMs for histopathology. ZEUS markedly reduces annotation burden while offering scalable, explainable tumor delineation for downstream diagnostic workflows.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18976v1" target="_blank" rel="noopener noreferrer">
                Peregrine：通用深度卷积网络全同态加密推理的单次微调
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Peregrine: One-Shot Fine-Tuning for FHE Inference of General Deep CNNs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Huaming Ling, Ying Wang, Si Chen, Junfeng Fan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于全同态加密（FHE）推理优化，属于隐私保护技术范畴，属于明确排除的非技术性隐私主题。虽然涉及深度学习模型，但核心是加密推理而非推荐系统、搜索或广告的架构创新或应用。该技术没有明显的推荐/搜索/广告应用潜力，与当前关注点无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:47:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18976v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18976v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We address two fundamental challenges in adapting general deep CNNs for FHE-based inference: approximating non-linear activations such as ReLU with low-degree polynomials while minimizing accuracy degradation, and overcoming the ciphertext capacity barrier that constrains high-resolution image processing on FHE inference. Our contributions are twofold: (1) a single-stage fine-tuning (SFT) strategy that directly converts pre-trained CNNs into FHE-friendly forms using low-degree polynomials, achieving competitive accuracy with minimal training overhead; and (2) a generalized interleaved packing (GIP) scheme that is compatible with feature maps of virtually arbitrary spatial resolutions, accompanied by a suite of carefully designed homomorphic operators that preserve the GIP-form encryption throughout computation. These advances enable efficient, end-to-end FHE inference across diverse CNN architectures. Experiments on CIFAR-10, ImageNet, and MS COCO demonstrate that the FHE-friendly CNNs obtained via our SFT strategy achieve accuracy comparable to baselines using ReLU or SiLU activations. Moreover, this work presents the first demonstration of FHE-based inference for YOLO architectures in object detection leveraging low-degree polynomial activations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18968v1" target="_blank" rel="noopener noreferrer">
                CataractCompDetect：白内障手术中并发症检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CataractCompDetect: Intraoperative Complication Detection in Cataract Surgery
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bhuvan Sachdeva, Sneha Kumari, Rudransh Agarwal, Shalaka Kumaraswamy, Niharika S...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于医疗领域（白内障手术）的并发症检测，属于明确的医疗应用范畴。这与我的关注领域（推荐系统、搜索、广告）完全无关，且医疗应用被明确列为不相关主题。该技术没有任何潜在的应用于推荐系统、搜索或广告的可能性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 10:34:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18968v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18968v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Cataract surgery is one of the most commonly performed surgeries worldwide, yet intraoperative complications such as iris prolapse, posterior capsule rupture (PCR), and vitreous loss remain major causes of adverse outcomes. Automated detection of such events could enable early warning systems and objective training feedback. In this work, we propose CataractCompDetect, a complication detection framework that combines phase-aware localization, SAM 2-based tracking, complication-specific risk scoring, and vision-language reasoning for final classification. To validate CataractCompDetect, we curate CataComp, the first cataract surgery video dataset annotated for intraoperative complications, comprising 53 surgeries, including 23 with clinical complications. On CataComp, CataractCompDetect achieves an average F1 score of 70.63%, with per-complication performance of 81.8% (Iris Prolapse), 60.87% (PCR), and 69.23% (Vitreous Loss). These results highlight the value of combining structured surgical priors with vision-language reasoning for recognizing rare but high-impact intraoperative events. Our dataset and code will be publicly released upon acceptance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18946v1" target="_blank" rel="noopener noreferrer">
                利用对抗学习实现虚拟染色中的病理保真度
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Leveraging Adversarial Learning for Pathological Fidelity in Virtual Staining
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>José Teixeira, Pascal Klöckner, Diana Montezuma, Melis Erdal Cesur, João Fraga, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及医学领域的虚拟染色技术，属于明确的无关主题（医学/生物学应用）。对抗学习虽然是通用技术，但该论文专注于病理学应用，与推荐系统、搜索或广告领域没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:56:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18946v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18946v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In addition to evaluating tumor morphology using H&E staining, immunohistochemistry is used to assess the presence of specific proteins within the tissue. However, this is a costly and labor-intensive technique, for which virtual staining, as an image-to-image translation task, offers a promising alternative. Although recent, this is an emerging field of research with 64% of published studies just in 2024. Most studies use publicly available datasets of H&E-IHC pairs from consecutive tissue sections. Recognizing the training challenges, many authors develop complex virtual staining models based on conditional Generative Adversarial Networks, but ignore the impact of adversarial loss on the quality of virtual staining. Furthermore, overlooking the issues of model evaluation, they claim improved performance based on metrics such as SSIM and PSNR, which are not sufficiently robust to evaluate the quality of virtually stained images. In this paper, we developed CSSP2P GAN, which we demonstrate to achieve heightened pathological fidelity through a blind pathological expert evaluation. Furthermore, while iteratively developing our model, we study the impact of the adversarial loss and demonstrate its crucial role in the quality of virtually stained images. Finally, while comparing our model with reference works in the field, we underscore the limitations of the currently used evaluation metrics and demonstrate the superior performance of CSSP2P GAN.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18922v1" target="_blank" rel="noopener noreferrer">
                One4D：通过解耦LoRA控制实现统一的4D生成与重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            One4D: Unified 4D Generation and Reconstruction via Decoupled LoRA Control
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenxing Mi, Yuxin Wang, Dan Xu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于4D生成和重建技术，属于计算机视觉和图形学领域，与推荐系统、搜索或广告没有直接关联。解耦LoRA控制虽然是Transformer架构的效率技术，但应用于4D数据而非推荐系统相关的序列或异构数据处理，因此相关性极低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:31:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18922v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18922v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present One4D, a unified framework for 4D generation and reconstruction that produces dynamic 4D content as synchronized RGB frames and pointmaps. By consistently handling varying sparsities of conditioning frames through a Unified Masked Conditioning (UMC) mechanism, One4D can seamlessly transition between 4D generation from a single image, 4D reconstruction from a full video, and mixed generation and reconstruction from sparse frames. Our framework adapts a powerful video generation model for joint RGB and pointmap generation, with carefully designed network architectures. The commonly used diffusion finetuning strategies for depthmap or pointmap reconstruction often fail on joint RGB and pointmap generation, quickly degrading the base video model. To address this challenge, we introduce Decoupled LoRA Control (DLC), which employs two modality-specific LoRA adapters to form decoupled computation branches for RGB frames and pointmaps, connected by lightweight, zero-initialized control links that gradually learn mutual pixel-level consistency. Trained on a mixture of synthetic and real 4D datasets under modest computational budgets, One4D produces high-quality RGB frames and accurate pointmaps across both generation and reconstruction tasks. This work represents a step toward general, high-quality geometry-based 4D world modeling using video diffusion models. Project page: https://mizhenxing.github.io/One4D
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18921v1" target="_blank" rel="noopener noreferrer">
                BackdoorVLM：针对视觉语言模型后门攻击的基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BackdoorVLM: A Benchmark for Backdoor Attacks on Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Juncheng Li, Yige Li, Hanxun Huang, Yunhao Chen, Xin Wang, Yixu Wang, Xingjun Ma...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言模型的后门攻击和基准测试，属于安全领域的研究。虽然提到了视觉语言模型，但核心关注点是安全攻击而非技术应用或架构创新。这与您明确排除的安全、隐私等非技术主题直接冲突，且没有展示在推荐系统、搜索或广告中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 09:30:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18921v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18921v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Backdoor attacks undermine the reliability and trustworthiness of machine learning systems by injecting hidden behaviors that can be maliciously activated at inference time. While such threats have been extensively studied in unimodal settings, their impact on multimodal foundation models, particularly vision-language models (VLMs), remains largely underexplored. In this work, we introduce \textbf{BackdoorVLM}, the first comprehensive benchmark for systematically evaluating backdoor attacks on VLMs across a broad range of settings. It adopts a unified perspective that injects and analyzes backdoors across core vision-language tasks, including image captioning and visual question answering. BackdoorVLM organizes multimodal backdoor threats into 5 representative categories: targeted refusal, malicious injection, jailbreak, concept substitution, and perceptual hijack. Each category captures a distinct pathway through which an adversary can manipulate a model's behavior. We evaluate these threats using 12 representative attack methods spanning text, image, and bimodal triggers, tested on 2 open-source VLMs and 3 multimodal datasets. Our analysis reveals that VLMs exhibit strong sensitivity to textual instructions, and in bimodal backdoors the text trigger typically overwhelms the image trigger when forming the backdoor mapping. Notably, backdoors involving the textual modality remain highly potent, with poisoning rates as low as 1\% yielding over 90\% success across most tasks. These findings highlight significant, previously underexplored vulnerabilities in current VLMs. We hope that BackdoorVLM can serve as a useful benchmark for analyzing and mitigating multimodal backdoor threats. Code is available at: https://github.com/bin015/BackdoorVLM .
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18900v1" target="_blank" rel="noopener noreferrer">
                MatMart：基于扩散模型的3D物体材质重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MatMart: Material Reconstruction of 3D Objects via Diffusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiuchao Wu, Pengfei Zhu, Jiangjing Lyu, Xinguo Liu, Jie Guo, Yanwen Guo, Weiwei ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D物体的材质重建和扩散模型应用，属于纯粹的计算机视觉和3D图形学领域。虽然涉及扩散模型技术，但论文内容与推荐系统、搜索或广告的核心技术需求没有直接关联，也无法看出在异构数据处理或Transformer架构方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:58:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18900v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18900v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Applying diffusion models to physically-based material estimation and generation has recently gained prominence. In this paper, we propose \ttt, a novel material reconstruction framework for 3D objects, offering the following advantages. First, \ttt\ adopts a two-stage reconstruction, starting with accurate material prediction from inputs and followed by prior-guided material generation for unobserved views, yielding high-fidelity results. Second, by utilizing progressive inference alongside the proposed view-material cross-attention (VMCA), \ttt\ enables reconstruction from an arbitrary number of input images, demonstrating strong scalability and flexibility. Finally, \ttt\ achieves both material prediction and generation capabilities through end-to-end optimization of a single diffusion model, without relying on additional pre-trained models, thereby exhibiting enhanced stability across various types of objects. Extensive experiments demonstrate that \ttt\ achieves superior performance in material reconstruction compared to existing methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18894v1" target="_blank" rel="noopener noreferrer">
                MetaDCSeg：通过元动态中心加权的鲁棒医学图像分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MetaDCSeg: Robust Medical Image Segmentation via Meta Dynamic Center Weighting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chenyu Mu, Guihai Chen, Xun Yang, Erkun Yang, Cheng Deng
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像分割领域，属于明确的医学应用范畴，与搜索、推荐、广告系统完全无关。论文标题中提到的元学习和动态中心加权技术虽然本身具有技术价值，但被应用于医学图像这一特定领域，且没有显示出任何在推荐系统、搜索或广告中的潜在应用可能性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:51:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18894v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18894v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Medical image segmentation is crucial for clinical applications, but it is frequently disrupted by noisy annotations and ambiguous anatomical boundaries, which lead to instability in model training. Existing methods typically rely on global noise assumptions or confidence-based sample selection, which inadequately mitigate the performance degradation caused by annotation noise, especially in challenging boundary regions. To address this issue, we propose MetaDCSeg, a robust framework that dynamically learns optimal pixel-wise weights to suppress the influence of noisy ground-truth labels while preserving reliable annotations. By explicitly modeling boundary uncertainty through a Dynamic Center Distance (DCD) mechanism, our approach utilizes weighted feature distances for foreground, background, and boundary centers, directing the model's attention toward hard-to-segment pixels near ambiguous boundaries. This strategy enables more precise handling of structural boundaries, which are often overlooked by existing methods, and significantly enhances segmentation performance. Extensive experiments across four benchmark datasets with varying noise levels demonstrate that MetaDCSeg consistently outperforms existing state-of-the-art methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18888v1" target="_blank" rel="noopener noreferrer">
                MFmamba：一种基于状态空间模型的全色图像分辨率恢复多功能网络
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MFmamba: A Multi-function Network for Panchromatic Image Resolution Restoration Based on State-Space Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qian Jiang, Qianqian Wang, Xin Jin, Michal Wozniak, Shaowen Yao, Wei Zhou
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像分辨率恢复任务，使用状态空间模型处理全色图像。虽然状态空间模型是序列建模的一种方法，但该工作纯粹针对视觉图像处理，没有展示与推荐系统、搜索或广告的潜在应用关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:44:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18888v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18888v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Remote sensing images are becoming increasingly widespread in military, earth resource exploration. Because of the limitation of a single sensor, we can obtain high spatial resolution grayscale panchromatic (PAN) images and low spatial resolution color multispectral (MS) images. Therefore, an important issue is to obtain a color image with high spatial resolution when there is only a PAN image at the input. The existing methods improve spatial resolution using super-resolution (SR) technology and spectral recovery using colorization technology. However, the SR technique cannot improve the spectral resolution, and the colorization technique cannot improve the spatial resolution. Moreover, the pansharpening method needs two registered inputs and can not achieve SR. As a result, an integrated approach is expected. To solve the above problems, we designed a novel multi-function model (MFmamba) to realize the tasks of SR, spectral recovery, joint SR and spectral recovery through three different inputs. Firstly, MFmamba utilizes UNet++ as the backbone, and a Mamba Upsample Block (MUB) is combined with UNet++. Secondly, a Dual Pool Attention (DPA) is designed to replace the skip connection in UNet++. Finally, a Multi-scale Hybrid Cross Block (MHCB) is proposed for initial feature extraction. Many experiments show that MFmamba is competitive in evaluation metrics and visual results and performs well in the three tasks when only the input PAN image is used.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18886v1" target="_blank" rel="noopener noreferrer">
                MagicWorld：基于交互式几何驱动的视频世界探索
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MagicWorld: Interactive Geometry-driven Video World Exploration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Guangyuan Li, Siming Zheng, Shuolin Xu, Jinwei Chen, Bo Li, Xiaobin Hu, Lei Zhao...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于视频世界探索和几何驱动交互，属于计算机视觉和图形学领域，与推荐系统、搜索或广告的核心技术无直接关联。标题中未提及任何与LLM、Transformer架构或推荐系统相关的技术要素，也不涉及异构数据建模等关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:41:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18886v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18886v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent interactive video world model methods generate scene evolution conditioned on user instructions. Although they achieve impressive results, two key limitations remain. First, they fail to fully exploit the correspondence between instruction-driven scene motion and the underlying 3D geometry, which results in structural instability under viewpoint changes. Second, they easily forget historical information during multi-step interaction, resulting in error accumulation and progressive drift in scene semantics and structure. To address these issues, we propose MagicWorld, an interactive video world model that integrates 3D geometric priors and historical retrieval. MagicWorld starts from a single scene image, employs user actions to drive dynamic scene evolution, and autoregressively synthesizes continuous scenes. We introduce the Action-Guided 3D Geometry Module (AG3D), which constructs a point cloud from the first frame of each interaction and the corresponding action, providing explicit geometric constraints for viewpoint transitions and thereby improving structural consistency. We further propose History Cache Retrieval (HCR) mechanism, which retrieves relevant historical frames during generation and injects them as conditioning signals, helping the model utilize past scene information and mitigate error accumulation. Experimental results demonstrate that MagicWorld achieves notable improvements in scene stability and continuity across interaction iterations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2511.18882v1" target="_blank" rel="noopener noreferrer">
                用于太阳能光伏适用性的立面分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Facade Segmentation for Solar Photovoltaic Suitability
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ayca Duran, Christoph Waibel, Bernd Bickel, Iro Armeni, Arno Schlueter
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的立面分割，应用于太阳能光伏领域，与推荐系统、搜索或广告的核心领域完全无关。该技术没有明显的潜力应用于推荐系统、搜索或广告领域，属于纯粹的视觉应用研究。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-11-24 08:37:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2511.18882v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2511.18882v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Building integrated photovoltaic (BIPV) facades represent a promising pathway towards urban decarbonization, especially where roof areas are insufficient and ground-mounted arrays are infeasible. Although machine learning-based approaches to support photovoltaic (PV) planning on rooftops are well researched, automated approaches for facades still remain scarce and oversimplified. This paper therefore presents a pipeline that integrates detailed information on the architectural composition of the facade to automatically identify suitable surfaces for PV application and estimate the solar energy potential. The pipeline fine-tunes SegFormer-B5 on the CMP Facades dataset and converts semantic predictions into facade-level PV suitability masks and PV panel layouts considering module sizes and clearances. Applied to a dataset of 373 facades with known dimensions from ten cities, the results show that installable BIPV potential is significantly lower than theoretical potential, thus providing valuable insights for reliable urban energy planning. With the growing availability of facade imagery, the proposed pipeline can be scaled to support BIPV planning in cities worldwide.
                </div>
            </details>
    </div>
</div>
        </div>
    </main>

    <!-- 加载论文数据和JavaScript逻辑 -->
    <script src="static/app.js"></script>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 在精选论文和普通论文之间添加展开/折叠按钮
            const papersContainer = document.querySelector('#papers-container');
            if (papersContainer) {
                // 添加展开/折叠全部按钮
                const expandAllButton = document.createElement('div');
                expandAllButton.className = 'expand-toggle';
                expandAllButton.textContent = '展开/折叠全部非精选论文';
                expandAllButton.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-all');
                    this.textContent = papersContainer.classList.contains('expanded-all') ? 
                        '收起全部非精选论文' : '展开全部非精选论文';
                    
                    // 更新所有论文标题前的图标状态
                    const collapsedPapers = papersContainer.querySelectorAll('.collapsed-level-1');
                    collapsedPapers.forEach(paper => {
                        const iconElement = paper.querySelector('.expand-icon');
                        if (iconElement) {
                            iconElement.className = papersContainer.classList.contains('expanded-all') ? 
                                'expand-icon fa fa-eye' : 'expand-icon fa fa-eye-slash';
                        }
                    });
                });
                
                // 找到第一个非精选论文的位置
                const firstNormalPaper = papersContainer.querySelector('.simple-paper-card');
                if (firstNormalPaper) {
                    papersContainer.insertBefore(expandAllButton, firstNormalPaper);
                }
                
                // 添加分割线用于展开分数<=1的论文
                const divider = document.createElement('div');
                divider.className = 'papers-divider';
                
                const dividerLabel = document.createElement('div');
                dividerLabel.className = 'papers-divider-label';
                dividerLabel.textContent = '点击展开更多论文（评分较低）';
                dividerLabel.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-level-2');
                    this.textContent = papersContainer.classList.contains('expanded-level-2') ? 
                        '点击收起低分论文' : '点击展开更多论文（评分较低）';
                });
                
                divider.appendChild(dividerLabel);
                
                // 在所有非精选论文的最后一个元素后面添加分割线
                const normalPapers = papersContainer.querySelectorAll('.simple-paper-card');
                if (normalPapers.length > 0) {
                    const lastNormalPaper = normalPapers[normalPapers.length - 1];
                    papersContainer.insertBefore(divider, lastNormalPaper.nextSibling);
                }
            }
            
            // 为每个非精选论文添加点击标题展开/折叠详情的功能
            const collapsedPapers = document.querySelectorAll('.collapsed-level-1');
            collapsedPapers.forEach(paper => {
                const titleElement = paper.querySelector('h3');
                if (titleElement) {
                    titleElement.style.cursor = 'pointer';
                    
                    // 创建展开/折叠图标元素并设置样式
                    const iconElement = document.createElement('i');
                    iconElement.className = 'expand-icon fa fa-eye-slash cursor-pointer';
                    iconElement.style.marginRight = '8px';
                    
                    // 将图标插入到标题链接之前，作为同级元素
                    const linkElement = titleElement.querySelector('a');
                    if (linkElement) {
                        // 将图标直接添加到标题元素中，位于链接之前
                        titleElement.insertBefore(iconElement, linkElement);
                        
                        // 为图标单独添加点击事件处理展开/折叠
                        iconElement.addEventListener('click', function(e) {
                            e.stopPropagation(); // 阻止事件冒泡到标题元素
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                this.className = isExpanded ? 
                                    'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                this.style.marginRight = '8px';
                            }
                        });
                    }
                    
                    // 为标题元素添加点击事件，也可以展开/折叠，但会检查点击目标
                    titleElement.addEventListener('click', function(e) {
                        // 仅当点击的是标题本身（非链接、非图标）时才展开/折叠
                        if (!e.target.closest('a') && !e.target.closest('.expand-icon')) {
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                const iconElement = this.querySelector('.expand-icon');
                                if (iconElement) {
                                    iconElement.className = isExpanded ? 
                                        'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                    iconElement.style.marginRight = '8px';
                                }
                            }
                        }
                    });
                }
            });
            
            // 实现"仅显示精选"按钮功能
            const showSelectedButton = document.getElementById('show-selected');
            if (showSelectedButton) {
                showSelectedButton.addEventListener('click', function() {
                    // 显示所有精选论文，隐藏所有普通论文
                    const selectedPapers = document.querySelectorAll('.paper-card');
                    const normalPapers = document.querySelectorAll('.simple-paper-card');
                    
                    selectedPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    normalPapers.forEach(paper => {
                        paper.style.display = 'none';
                    });
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${selectedPapers.length} 篇论文 (共 ${selectedPapers.length + normalPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-all').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 隐藏展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) expandToggle.style.display = 'none';
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'none';
                });
            }
            
            // 实现"全部论文"按钮功能
            const showAllButton = document.getElementById('show-all');
            if (showAllButton) {
                showAllButton.addEventListener('click', function() {
                    // 显示所有论文
                    const allPapers = document.querySelectorAll('.paper-card, .simple-paper-card');
                    allPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    // 重置折叠状态
                    papersContainer.classList.remove('expanded-all');
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${allPapers.length} 篇论文 (共 ${allPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-selected').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 重新显示展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) {
                        expandToggle.style.display = 'block';
                        expandToggle.textContent = '展开全部非精选论文';
                    }
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'block';
                });
            }
        });
    </script>
    <script>
    
    // 初始化日历
    document.addEventListener('DOMContentLoaded', () => {
        try {
            console.log('Attempting to initialize calendar...');
            initCalendar();
        } catch (error) {
            console.error('Error initializing calendar:', error);
        }
    });
    
    // 日历初始化函数
    function initCalendar() {
        const toggleBtn = document.getElementById('date-picker-toggle');
        const datePicker = document.getElementById('date-picker');
        const calendarGrid = document.getElementById('calendar-grid');
        const prevMonthBtn = document.getElementById('prev-month');
        const nextMonthBtn = document.getElementById('next-month');
        const currentMonthEl = document.getElementById('current-month');
        const selectedDateText = document.getElementById('selected-date-text');
        
        // 当前显示的日期（从页面获取）
        const currentDateStr = document.getElementById('current-date').textContent.trim().replace(/^\d+年|月|日/g, '');
        const currentDate = new Date(currentDateStr);
        let displayYear = currentDate.getFullYear();
        let displayMonth = currentDate.getMonth();
        
        // 有论文数据的日期列表
        const availableDates = ["20251105","20251107","20251009","20251121","20251113","20251030","20251111","20251031","20251017","20251021","20251010","20251024","20251022","20251029","20251114","20251118","20251120","20251016","20251015","20251028","20251014","20251119","20251112","20251106","20251125","20251023"];
        
        // 尝试从localStorage恢复选择状态
        const savedDate = localStorage.getItem('selectedDate');
        const savedYear = localStorage.getItem('selectedYear');
        const savedMonth = localStorage.getItem('selectedMonth');
        
        // 确保页面加载时显示当前选中的日期
        // 修复持久化问题：确保每次加载都能正确恢复选中状态
        if (savedDate) {
            selectedDateText.textContent = savedDate;
            if (savedYear) displayYear = parseInt(savedYear);
            if (savedMonth) displayMonth = parseInt(savedMonth);
        } else {
            // 首次加载时，将当前页面日期保存到localStorage
            const currentPageDate = currentDateStr.replace(/\//g, '-');
            selectedDateText.textContent = currentPageDate;
            localStorage.setItem('selectedDate', currentPageDate);
            localStorage.setItem('selectedYear', currentDate.getFullYear().toString());
            localStorage.setItem('selectedMonth', currentDate.getMonth().toString());
        }
    
        // 切换日历显示状态
        toggleBtn.addEventListener('click', (e) => {
            e.stopPropagation();
            
            // 显式控制hidden类的添加和移除
            if (datePicker.classList.contains('hidden')) {
                // 显示日历 - 确保移除hidden类
                datePicker.classList.remove('hidden');
                renderCalendar();
            } else {
                // 隐藏日历
                datePicker.classList.add('hidden');
            }
        });
        
        // 点击其他区域关闭日历
        document.addEventListener('click', () => {
            if (!datePicker.classList.contains('hidden')) {
                datePicker.classList.add('hidden');
            }
        });
        
        // 阻止日历内部点击事件冒泡
        datePicker.addEventListener('click', (e) => {
            e.stopPropagation();
        });
        
        // 上月和下月按钮
        prevMonthBtn.addEventListener('click', () => {
            displayMonth--;
            if (displayMonth < 0) {
                displayMonth = 11;
                displayYear--;
            }
            renderCalendar();
        });
        
        nextMonthBtn.addEventListener('click', () => {
            displayMonth++;
            if (displayMonth > 11) {
                displayMonth = 0;
                displayYear++;
            }
            renderCalendar();
        });
        
        /**
         * 渲染日历
         */
        function renderCalendar() {
            // 清空日历网格
            calendarGrid.innerHTML = '';
            
            // 更新当前月份显示
            const monthNames = ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月'];
            currentMonthEl.textContent = displayYear + '年' + monthNames[displayMonth];
            
            // 计算当前月份的第一天是星期几
            const firstDay = new Date(displayYear, displayMonth, 1);
            const firstDayOfWeek = firstDay.getDay();
            
            // 计算当前月份的天数
            const daysInMonth = new Date(displayYear, displayMonth + 1, 0).getDate();
            
            // 添加上月的占位天数
            for (let i = 0; i < firstDayOfWeek; i++) {
                const emptyDay = document.createElement('div');
                emptyDay.classList.add('py-1', 'text-gray-300');
                calendarGrid.appendChild(emptyDay);
            }
            
            // 获取当前日期（用于高亮显示）
            const today = new Date();
            today.setHours(0, 0, 0, 0);
            
            // 添加当前月份的天数
            for (let day = 1; day <= daysInMonth; day++) {
                const dayElement = document.createElement('div');
                const currentDateObj = new Date(displayYear, displayMonth, day);
                const dateStr = displayYear + String(displayMonth + 1).padStart(2, '0') + String(day).padStart(2, '0');
                const displayDateStr = displayYear + '-' + String(displayMonth + 1).padStart(2, '0') + '-' + String(day).padStart(2, '0');
                
                // 设置日期元素基本样式
                dayElement.textContent = day;
                
                // 检查该日期是否有论文数据
                const hasPapers = availableDates.includes(dateStr);
                
                if (hasPapers) {
                    // 有论文数据的日期样式
                    dayElement.classList.add('py-1', 'cursor-pointer', 'hover:bg-gray-100', 'rounded', 'bg-blue-50', 'font-medium');
                    
                    // 添加点击事件，跳转到对应日期的页面
                    dayElement.addEventListener('click', () => {
                        console.log('Date clicked:', displayDateStr);
                        selectedDateText.textContent = displayDateStr;
                        
                        // 保存选择状态到localStorage
                        localStorage.setItem('selectedDate', displayDateStr);
                        localStorage.setItem('selectedYear', displayYear.toString());
                        localStorage.setItem('selectedMonth', displayMonth.toString());
                        
                        datePicker.classList.add('hidden');
                        
                        // 构造目标URL并跳转
                        const targetUrl = 'arxiv_' + dateStr + '.html';
                        window.location.href = targetUrl;
                    });
                } else {
                    // 没有论文数据的日期样式（置灰不可点击）
                    dayElement.classList.add('py-1', 'text-gray-400', 'cursor-not-allowed');
                }
                
                // 高亮显示当天日期（覆盖之前的样式）
                if (currentDateObj.getTime() === today.getTime()) {
                    dayElement.classList.remove('bg-blue-50');
                    dayElement.classList.add('bg-primary', 'text-white', 'font-bold', 'shadow');
                    if (!hasPapers) {
                        // 当天没有论文时，仍然置灰但保持背景色
                        dayElement.classList.add('opacity-70');
                    }
                }
                
                // 高亮显示当前选中的日期
                if (displayDateStr === selectedDateText.textContent) {
                    dayElement.classList.add('font-bold', 'border-2', 'border-primary', 'rounded-lg', 'shadow-md');
                }
                
                // 增强有论文数据的日期样式，使其更明显
                if (hasPapers && currentDateObj.getTime() !== today.getTime()) {
                    dayElement.classList.add('bg-blue-100', 'hover:bg-blue-200', 'transition-colors', 'duration-200');
                }
                
                calendarGrid.appendChild(dayElement);
            }
        }
    }
    </script>
    </body>

</html>